View Javadoc
1   package wjhk.jupload2.upload.helper;
2   
3   import java.io.IOException;
4   import java.io.PushbackInputStream;
5   import java.util.regex.Matcher;
6   import java.util.regex.Pattern;
7   
8   import wjhk.jupload2.exception.JUploadEOFException;
9   import wjhk.jupload2.exception.JUploadException;
10  import wjhk.jupload2.exception.JUploadIOException;
11  import wjhk.jupload2.policies.UploadPolicy;
12  
13  /**
14   * A helper, to read the response coming from the server.
15   * 
16   * @author etienne_sf
17   */
18  public class HTTPInputStreamReader {
19      // //////////////////////////////////////////////////////////////////////////////
20      // //////////////////// Main attributes
21      // //////////////////////////////////////////////////////////////////////////////
22  
23      /**
24       * The current upload policy, always useful.
25       */
26      private UploadPolicy uploadPolicy = null;
27  
28      private HTTPConnectionHelper httpConnectionHelper = null;
29  
30      /**
31       * Contains the HTTP response bytearrayResponseBody, that is: the server
32       * response, without the headers.
33       */
34      String stringResponseBody = null;
35  
36      private byte[] bytearrayResponseBody = new byte[0];
37  
38      /**
39       * The headers of the HTTP response.
40       */
41      String responseHeaders = null;
42  
43      /**
44       * The status message from the first line of the response (e.g. "200 OK").
45       */
46      String responseMsg = null;
47  
48      // ////////////////////////////////////////////////////////////////////////////////////
49      // /////////////////// ATTRIBUTE CONTAINING DATA COMING FROM THE RESPONSE
50      // ////////////////////////////////////////////////////////////////////////////////////
51  
52      private CookieJar cookies = null;
53  
54      boolean gotClose = false;
55  
56      private boolean gotChunked = false;
57  
58      private boolean gotContentLength = false;
59  
60      private int clen = 0;
61  
62      /**
63       * The server HTTP response. Should be 200, in case of success.
64       */
65      private int httpStatusCode = 0;
66  
67      private String line = "";
68  
69      private String charset = "ISO-8859-1";
70  
71      // ////////////////////////////////////////////////////////////////////////////////////
72      // /////////////////// CONSTANTS USED TO CONTROL THE HTTP INTPUT
73      // ////////////////////////////////////////////////////////////////////////////////////
74      private final static int CHUNKBUF_SIZE = 4096;
75  
76      private final byte chunkbuf[] = new byte[CHUNKBUF_SIZE];
77  
78      private final static Pattern pChunked = Pattern.compile(
79              "^Transfer-Encoding:\\s+chunked", Pattern.CASE_INSENSITIVE);
80  
81      private final static Pattern pClose = Pattern.compile(
82              "^Connection:\\s+close", Pattern.CASE_INSENSITIVE);
83  
84      private final static Pattern pProxyClose = Pattern.compile(
85              "^Proxy-Connection:\\s+close", Pattern.CASE_INSENSITIVE);
86  
87      private final static Pattern pHttpStatus = Pattern
88              .compile("^HTTP/\\d\\.\\d\\s+((\\d+)\\s+.*)$");
89  
90      private final static Pattern pContentLen = Pattern.compile(
91              "^Content-Length:\\s+(\\d+)$", Pattern.CASE_INSENSITIVE);
92  
93      private final static Pattern pContentTypeCs = Pattern.compile(
94              "^Content-Type:\\s+.*;\\s*charset=([^;\\s]+).*$",
95              Pattern.CASE_INSENSITIVE);
96  
97      private final static Pattern pSetCookie = Pattern.compile(
98              "^Set-Cookie:\\s+(.*)$", Pattern.CASE_INSENSITIVE);
99  
100     /**
101      * The standard constructor: does nothing ! Oh yes, it initialize some
102      * attribute from the given parameter... :-)
103      * 
104      * @param httpConnectionHelper The connection helper, associated with this
105      *            instance.
106      * @param uploadPolicy The current upload policy.
107      */
108     public HTTPInputStreamReader(HTTPConnectionHelper httpConnectionHelper,
109             UploadPolicy uploadPolicy) {
110         this.httpConnectionHelper = httpConnectionHelper;
111         this.uploadPolicy = uploadPolicy;
112         this.cookies = new CookieJar(uploadPolicy);
113     }
114 
115     /**
116      * Return the last read http response (200, in case of success).
117      * 
118      * @return The last read http response
119      */
120     public int gethttpStatusCode() {
121         return this.httpStatusCode;
122     }
123 
124     /**
125      * Get the last response bytearrayResponseBody.
126      * 
127      * @return The last read response bytearrayResponseBody.
128      */
129     public String getResponseBody() {
130         return this.stringResponseBody;
131     }
132 
133     /**
134      * Get the last response bytearrayResponseBody. This byte array should be
135      * decoded by using the {@link #getResponseCharset()}.
136      * 
137      * @return The last read response bytearrayResponseBody.
138      * 
139      */
140     public byte[] getResponseBodyAsByteArray() {
141         return this.bytearrayResponseBody;
142     }
143 
144     /**
145      * Get the charset that should be used to decode the last response, when
146      * using the {@link #getResponseBodyAsByteArray()} method.
147      * 
148      * @return The last read response bytearrayResponseBody.
149      */
150     public String getResponseCharset() {
151         return this.charset;
152     }
153 
154     /**
155      * Get the headers of the HTTP response.
156      * 
157      * @return The HTTP headers.
158      */
159     public String getResponseHeaders() {
160         return this.responseHeaders;
161     }
162 
163     /**
164      * Get the last response message.
165      * 
166      * @return The response message from the first line of the response (e.g.
167      *         "200 OK").
168      */
169     public String getResponseMsg() {
170         return this.responseMsg;
171     }
172 
173     /**
174      * The main method: reads the response in the input stream.
175      * 
176      * @return The response status (e.g.: 200 if everything was ok)
177      * @throws JUploadException
178      */
179     public int readHttpResponse() throws JUploadException {
180         PushbackInputStream httpDataIn = this.httpConnectionHelper
181                 .getInputStream();
182 
183         try {
184             // We first read the headers,
185             readHeaders(httpDataIn);
186 
187             // then the bytearrayResponseBody.
188             // If we're in a HEAD request ... we're not interested in the
189             // bytearrayResponseBody!
190             if (this.httpConnectionHelper.getMethod().equals("HEAD")) {
191                 this.uploadPolicy
192                         .displayDebug(
193                                 "This is a HEAD request: we don't care about the bytearrayResponseBody",
194                                 70);
195                 this.stringResponseBody = "";
196             } else {
197                 readBody(httpDataIn);
198             }
199         } catch (JUploadException e) {
200             throw e;
201         } catch (Exception e) {
202             throw new JUploadException(e);
203         }
204 
205         return this.httpStatusCode;
206     }
207 
208     // //////////////////////////////////////////////////////////////////////////////////////
209     // //////////////////// Various utilities
210     // //////////////////////////////////////////////////////////////////////////////////////
211 
212     /**
213      * Concatenates two byte arrays.
214      * 
215      * @param buf1 The first array
216      * @param buf2 The second array
217      * @return A byte array, containing buf2 appended to buf2
218      */
219     static byte[] byteAppend(byte[] buf1, byte[] buf2) {
220         byte[] ret = new byte[buf1.length + buf2.length];
221         System.arraycopy(buf1, 0, ret, 0, buf1.length);
222         System.arraycopy(buf2, 0, ret, buf1.length, buf2.length);
223         return ret;
224     }
225 
226     /**
227      * Concatenates two byte arrays.
228      * 
229      * @param buf1 The first array
230      * @param buf2 The second array
231      * @param len Number of bytes to copy from buf2
232      * @return A byte array, containing buf2 appended to buf2
233      */
234     static byte[] byteAppend(byte[] buf1, byte[] buf2, int len) {
235         if (len > buf2.length)
236             len = buf2.length;
237         byte[] ret = new byte[buf1.length + len];
238         System.arraycopy(buf1, 0, ret, 0, buf1.length);
239         System.arraycopy(buf2, 0, ret, buf1.length, len);
240         return ret;
241     }
242 
243     /**
244      * Similar like BufferedInputStream#readLine() but operates on raw bytes.
245      * Line-Ending is <b>always</b> "\r\n".
246      * 
247      * @param inputStream
248      * 
249      * @param charset The input charset of the stream.
250      * @param includeCR Set to true, if the terminating CR/LF should be included
251      *            in the returned byte array.
252      * @return The line, encoded from the input stream with the given charset
253      * @throws IOException
254      * @throws JUploadException
255      */
256     public static String readLine(PushbackInputStream inputStream,
257             String charset, boolean includeCR) throws IOException,
258             JUploadException {
259         byte[] line = readLine(inputStream, includeCR);
260         return (null == line) ? null : new String(line, charset);
261     }
262 
263     /**
264      * Similar like BufferedInputStream#readLine() but operates on raw bytes.
265      * According to RFC 2616, and of line may be CR (13), LF (10) or CRLF.
266      * Line-Ending is <b>always</b> "\r\n" in header, but not in text bodies.
267      * Update done by TedA (sourceforge account: tedaaa). Allows to manage
268      * response from web server that send LF instead of CRLF ! Here is a part of
269      * the RFC: <I>"we recommend that applications, when parsing such headers,
270      * recognize a single LF as a line terminator and ignore the leading
271      * CR"</I>. <BR>
272      * Corrected again to manage line finished by CR only. This is not allowed
273      * in headers, but this method is also used to read lines in the
274      * bytearrayResponseBody.
275      * 
276      * @param inputStream
277      * 
278      * @param includeCR Set to true, if the terminating CR/LF should be included
279      *            in the returned byte array. In this case, CR/LF is always
280      *            returned to the caller, whether the input stream got CR, LF or
281      *            CRLF.
282      * @return The byte array from the input stream, with or without a trailing
283      *         CRLF
284      * @throws IOException
285      * @throws JUploadException
286      */
287     public static byte[] readLine(PushbackInputStream inputStream,
288             boolean includeCR) throws IOException, JUploadException {
289         final byte EOS = -1;
290         final byte CR = 13;
291         final byte LF = 10;
292         int len = 0;
293         int buflen = 128; // average line length
294         byte[] buf = new byte[buflen];
295         byte[] ret = null;
296         int b;
297         boolean lineRead = false;
298 
299         while (!lineRead) {
300             try {
301                 b = inputStream.read();
302             } catch (IOException ioe) {
303                 throw new JUploadIOException(ioe.getClass().getName() + ": "
304                         + ioe.getMessage()
305                         + " (while reading server response )", ioe);
306             } catch (Exception e) {
307                 throw new JUploadException(e.getClass().getName() + ": "
308                         + e.getMessage() + " (while reading server response )",
309                         e);
310             }
311             switch (b) {
312                 case EOS:
313                     // We've finished reading the stream, and so the line is
314                     // finished too.
315                     if (len == 0) {
316                         return null;
317                     }
318                     lineRead = true;
319                     break;
320                 /*
321                  * if (len > 0) { ret = new byte[len]; System.arraycopy(buf, 0,
322                  * ret, 0, len); return ret; } return null;
323                  */
324                 case LF:
325                     // We found the end of the current line.
326                     lineRead = true;
327                     break;
328                 case CR:
329                     // We got a CR. It can be the end of line.
330                     // Is it followed by a LF ? (not mandatory in RFC 2616)
331                     b = inputStream.read();
332 
333                     if (b != LF) {
334                         // The end of line was a simple LF: the next one blongs
335                         // to the next line.
336                         inputStream.unread(b);
337                     }
338                     lineRead = true;
339                     break;
340                 default:
341                     buf[len++] = (byte) b;
342                     // If the buffer is too small, we let enough space to add CR
343                     // and LF, in case of ...
344                     if (len + 2 >= buflen) {
345                         buflen *= 2;
346                         byte[] tmp = new byte[buflen];
347                         System.arraycopy(buf, 0, tmp, 0, len);
348                         buf = tmp;
349                     }
350             }
351         } // while
352 
353         // Let's go back to before any CR and LF.
354         while (len > 0 && (buf[len] == CR || buf[len] == LF)) {
355             len -= 1;
356         }
357 
358         // Ok, now len indicates the end of the actual line.
359         // Should we add a proper CRLF, or nothing ?
360         if (includeCR) {
361             // We have enough space to add these two characters (see the default
362             // here above)
363             buf[len++] = CR;
364             buf[len++] = LF;
365         }
366 
367         if (len > 0) {
368             ret = new byte[len];
369             if (len > 0)
370                 System.arraycopy(buf, 0, ret, 0, len);
371         } else {
372             // line feed for empty line between headers and
373             // bytearrayResponseBody, or within the
374             // bytearrayResponseBody.
375             ret = new byte[0];
376         }
377         return ret;
378     }
379 
380     /**
381      * Read the headers from the given input stream.
382      * 
383      * @param httpDataIn The http input stream
384      * @throws IOException
385      * @throws JUploadException
386      */
387     private void readHeaders(PushbackInputStream httpDataIn)
388             throws IOException, JUploadException {
389         StringBuffer sbHeaders = new StringBuffer();
390         // Headers are US-ASCII (See RFC 2616, Section 2.2)
391         String tmp;
392         // We must be reading the first line of the HTTP header.
393         this.uploadPolicy.displayDebug(
394                 "-------- Response Headers Start --------", 80);
395 
396         do {
397             tmp = readLine(httpDataIn, "US-ASCII", false);
398             if (null == tmp) {
399                 throw new JUploadEOFException(this.uploadPolicy,
400                         "reading headers");
401             }
402             if (this.httpStatusCode == 0) {
403                 Matcher m = pHttpStatus.matcher(tmp);
404                 if (m.matches()) {
405                     this.httpStatusCode = Integer.parseInt(m.group(2));
406                     this.responseMsg = m.group(1);
407                 } else {
408                     // The status line must be the first line of the
409                     // response. (See RFC 2616, Section 6.1) so this
410                     // is an error.
411 
412                     // We first display the wrong line.
413                     this.uploadPolicy.displayDebug("First line of response: '"
414                             + tmp + "'", 80);
415                     // Then, we throw the exception.
416                     throw new JUploadException(
417                             "HTTP response did not begin with status line.");
418                 }
419             }
420             // Handle folded headers (RFC 2616, Section 2.2). This is
421             // handled after the status line, because that line may
422             // not be folded (RFC 2616, Section 6.1).
423             if (tmp.startsWith(" ") || tmp.startsWith("\t"))
424                 this.line += " " + tmp.trim();
425             else
426                 this.line = tmp;
427 
428             // The read line is now correctly formatted.
429             this.uploadPolicy.displayDebug(this.line, 80);
430             sbHeaders.append(tmp).append("\n");
431 
432             if (pClose.matcher(this.line).matches())
433                 this.gotClose = true;
434             if (pProxyClose.matcher(this.line).matches())
435                 this.gotClose = true;
436             if (pChunked.matcher(this.line).matches())
437                 this.gotChunked = true;
438             Matcher m = pContentLen.matcher(this.line);
439             if (m.matches()) {
440                 this.gotContentLength = true;
441                 this.clen = Integer.parseInt(m.group(1));
442             }
443             m = pContentTypeCs.matcher(this.line);
444             if (m.matches())
445                 this.charset = m.group(1);
446             m = pSetCookie.matcher(this.line);
447             if (m.matches()) {
448                 this.uploadPolicy.displayDebug(
449                         "Calling this.cookies.parseCookieHeader, with parameter: "
450                                 + m.group(1), 80);
451                 this.cookies.parseCookieHeader(m.group(1));
452                 this.uploadPolicy.displayDebug("Cookie header parsed.", 80);
453             }
454             // RFC 2616, Section 6. Body is separated by the header with an
455             // empty line: so end of headers is an empty line.
456         } while (this.line.length() > 0);
457 
458         this.responseHeaders = sbHeaders.toString();
459         this.uploadPolicy.displayDebug(
460                 "--------- Response Headers End ---------", 80);
461     }// readHeaders()
462 
463     /**
464      * Read the bytearrayResponseBody from the given input stream.
465      * 
466      * @param httpDataIn The http input stream
467      * @throws IOException
468      * @throws JUploadException
469      * @throws JUploadException
470      */
471     private void readBody(PushbackInputStream httpDataIn) throws IOException,
472             JUploadException {
473         // && is evaluated from left to right so !stop must come first!
474         while ((!this.gotContentLength) || (this.clen > 0)) {
475             if (this.gotChunked) {
476                 // Read the chunk header.
477                 // This is US-ASCII! (See RFC 2616, Section 2.2)
478                 this.line = readLine(httpDataIn, "US-ASCII", false);
479                 if (null == this.line)
480                     throw new JUploadEOFException(this.uploadPolicy,
481                             "reading HTTP Body, HTTP chunked mode (1)");
482                 // Handle a single chunk of the response
483                 // We cut off possible chunk extensions and ignore them.
484                 // The length is hex-encoded (RFC 2616, Section 3.6.1)
485                 int len = Integer.parseInt(this.line.replaceFirst(";.*", "")
486                         .trim(), 16);
487                 this.uploadPolicy.displayDebug("Chunk: " + this.line + " dec: "
488                         + len, 70);
489                 if (len == 0) {
490                     // RFC 2616, Section 3.6.1: A length of 0 denotes
491                     // the last chunk of the bytearrayResponseBody.
492 
493                     // This code wrong if the server sends chunks with trailers!
494                     // (trailers are HTTP Headers that are send *after* the
495                     // bytearrayResponseBody. These are announced
496                     // in the regular HTTP header "Trailer".
497                     // Fritz: Never seen them so far ...
498                     // TODO: Implement trailer-handling.
499                     break;
500                 }
501 
502                 // Loop over the chunk (len == length of the chunk)
503                 while (len > 0) {
504                     int rlen = (len > CHUNKBUF_SIZE) ? CHUNKBUF_SIZE : len;
505                     int ofs = 0;
506                     if (rlen > 0) {
507                         while (ofs < rlen) {
508                             int res = httpDataIn.read(this.chunkbuf, ofs, rlen
509                                     - ofs);
510                             if (res < 0)
511                                 throw new JUploadEOFException(
512                                         this.uploadPolicy,
513                                         "reading body, HTTP chunk mode (2)");
514                             len -= res;
515                             ofs += res;
516                         }
517                         if (ofs < rlen)
518                             throw new JUploadException("short read");
519                         if (rlen < CHUNKBUF_SIZE)
520                             this.bytearrayResponseBody = byteAppend(
521                                     this.bytearrayResponseBody, this.chunkbuf,
522                                     rlen);
523                         else
524                             this.bytearrayResponseBody = byteAppend(
525                                     this.bytearrayResponseBody, this.chunkbuf);
526                     }
527                 }
528                 // Got the whole chunk, read the trailing CRLF.
529                 readLine(httpDataIn, false);
530             } else {
531                 // Not chunked. Use either content-length (if available)
532                 // or read until EOF.
533                 if (this.gotContentLength) {
534                     // Got a Content-Length. Read exactly that amount of
535                     // bytes.
536                     while (this.clen > 0) {
537                         int rlen = (this.clen > CHUNKBUF_SIZE) ? CHUNKBUF_SIZE
538                                 : this.clen;
539                         int ofs = 0;
540                         if (rlen > 0) {
541                             while (ofs < rlen) {
542                                 int res = httpDataIn.read(this.chunkbuf, ofs,
543                                         rlen - ofs);
544                                 if (res < 0)
545                                     throw new JUploadEOFException(
546                                             this.uploadPolicy,
547                                             "reading HTTP bytearrayResponseBody, not chunked mode");
548                                 this.clen -= res;
549                                 ofs += res;
550                             }
551                             if (ofs < rlen)
552                                 throw new JUploadException("short read");
553                             if (rlen < CHUNKBUF_SIZE)
554                                 this.bytearrayResponseBody = byteAppend(
555                                         this.bytearrayResponseBody,
556                                         this.chunkbuf, rlen);
557                             else
558                                 this.bytearrayResponseBody = byteAppend(
559                                         this.bytearrayResponseBody,
560                                         this.chunkbuf);
561                         }
562                     }
563                 } else {
564                     // No Content-length available, read until EOF
565                     //
566                     while (true) {
567                         byte[] lbuf = readLine(httpDataIn, true);
568                         if (null == lbuf)
569                             break;
570                         this.bytearrayResponseBody = byteAppend(
571                                 this.bytearrayResponseBody, lbuf);
572                     }
573                     break;
574                 }
575             }
576         } // while
577 
578         // Convert the whole bytearrayResponseBody according to the charset.
579         // The default for charset ISO-8859-1, but overridden by
580         // the charset attribute of the Content-Type header (if any).
581         // See RFC 2616, Sections 3.4.1 and 3.7.1.
582         this.stringResponseBody = new String(this.bytearrayResponseBody,
583                 this.charset);
584 
585         // At the higher debug level, we display the response.
586         this.uploadPolicy.displayDebug("-------- Response Body Start --------",
587                 99);
588         this.uploadPolicy.displayDebug(this.stringResponseBody, 99);
589         this.uploadPolicy.displayDebug("-------- Response Body End --------",
590                 99);
591     }// readBody
592 }