|
Enterprise Information Portal APIs |
||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object | +--java.net.URLConnection | +--com.ibm.gcs.netutil.GCSHttpConnection
GCSHttpConnection is a customization of the
java.net.HttpURLConnection
class.
A GCSHttpConnection object is returned by the openConnection method of the
GCSHttpStreamHandler class for the "http" protocol.
java.net.HttpURLConnection
,
GCSHttpStreamHandler
Field Summary | |
static java.lang.String |
AUTHORIZATION
|
static boolean |
cookieDBOn
|
static java.lang.String |
CRLF
|
static int |
DEFAULT_ROBOTS_CACHE_SIZE
|
static java.lang.String |
HTTP_CONNECT
|
static java.lang.String |
HTTP_DELETE
|
static java.lang.String |
HTTP_GET
|
static java.lang.String |
HTTP_HEAD
|
static java.lang.String |
HTTP_OPTIONS
|
static java.lang.String |
HTTP_POST
|
static java.lang.String |
HTTP_PUT
|
static java.lang.String |
HTTP_TRACE
|
static java.lang.String |
HTTP_VERSION
|
static java.lang.String |
IF_MODIFIED_SINCE
|
static java.lang.String |
LAST_MODIFIED
|
static boolean |
pdfToHtmlConversionOn
|
static java.util.Map |
robotsLocksTable
|
static java.lang.String |
SET_COOKIE
|
static java.lang.String |
SPC
|
static java.lang.String[] |
supportedRequestMethods
|
Constructor Summary | |
GCSHttpConnection(java.net.URL u)
(constructor) |
Method Summary | |
boolean |
checkIfAllowedToCrawl(java.net.URL u)
|
void |
connect()
Opens a connection to the URL if already not connected. |
void |
disconnect()
Disconnect a previously established connection with the http server. |
java.lang.Object |
getContent()
Retrieves the content of this URL connection. |
java.lang.String |
getContentEncoding()
Returns the content encoding, or null if not found |
int |
getContentLength()
Get the length of the content (length of the content header field). |
java.lang.String |
getContentType()
Gets the content type of the resource. |
java.lang.String |
getDateModified()
Returns the date last modified String from HTTP header, or null if not found |
java.lang.String |
getHeaderField(java.lang.String fieldName)
Gets a field value based on the key in the headers that are sent back from the server in response to a connection request. |
java.util.Hashtable |
getHeaders()
Gets the headers that are sent back from the server in response to a connection request. |
java.io.InputStream |
getInputStream()
get an input stream that reads from this open connection overrides the super class' getInputStream method. |
java.lang.String |
getOutContent()
Returns the current outcontent |
java.lang.String |
getRequestMethod()
Returns the current transaction method |
java.lang.String |
getRequestProperty(java.lang.String key)
Description copied from URLConnection Returns the value of the named general request property for this connection. |
int |
getResponseCode()
Gets the response code or the status of a connection request. |
java.lang.String |
getResponseMessage()
Gets the response message of a connection request Response messages are basically strings like "OK" or "Not Found" extracted from response messages like HTTP/1.0 200 OK - or - HTTP/1.0 404 Not Found |
RobotsProcessor |
getRobotsProcessor()
Get the RobotsProcessor object (if already set up) for this connection |
static java.lang.String |
guessContentTypeFromStream(java.io.InputStream is)
guessContentTypeFromStream this is helpful in identifying "xml"s and "dtd"s which are not sent as the right streams overrides the base class method to figure out the contents in a better way |
boolean |
outContentIsEmpty()
Tells if the outContent is empty |
boolean |
robotsAllowed()
checks if robots are allowed. |
void |
setAuthorization(java.lang.String username,
char[] password)
If not connected, set the authorization header according the basic-authentication scheme as per rfc2617. |
void |
setIfModifiedSince(long ifmodifiedsince)
Calls super, then sets the value in the request header. |
void |
setOutContent(java.lang.String outContent)
This sets the content sent during a transaction. |
static void |
setProxy(java.lang.String _proxyHost,
int _proxyPort)
set proxy info for all HTTP connections |
void |
setRequestMethod(java.lang.String method)
Sets the method that will be used for the HTTP transaction. |
void |
setRequestProperty(java.lang.String key,
java.lang.String value)
Description copied from URLConnection. |
Methods inherited from class java.net.URLConnection |
getAllowUserInteraction, getContent, getDate, getDefaultAllowUserInteraction, getDefaultRequestProperty, getDefaultUseCaches, getDoInput, getDoOutput, getExpiration, getFileNameMap, getHeaderField, getHeaderFieldDate, getHeaderFieldInt, getHeaderFieldKey, getIfModifiedSince, getLastModified, getOutputStream, getPermission, getURL, getUseCaches, setAllowUserInteraction, setContentHandlerFactory, setDefaultAllowUserInteraction, setDefaultRequestProperty, setDefaultUseCaches, setDoInput, setDoOutput, setFileNameMap, setUseCaches, toString |
Methods inherited from class java.lang.Object |
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
public static boolean cookieDBOn
public static boolean pdfToHtmlConversionOn
public static final java.lang.String HTTP_VERSION
public static final java.lang.String HTTP_GET
public static final java.lang.String HTTP_POST
public static final java.lang.String HTTP_HEAD
public static final java.lang.String HTTP_PUT
public static final java.lang.String HTTP_DELETE
public static final java.lang.String HTTP_TRACE
public static final java.lang.String HTTP_OPTIONS
public static final java.lang.String HTTP_CONNECT
public static final java.lang.String[] supportedRequestMethods
public static final java.lang.String CRLF
public static final java.lang.String SPC
public static final java.lang.String SET_COOKIE
public static final java.lang.String IF_MODIFIED_SINCE
public static final java.lang.String AUTHORIZATION
public static final java.lang.String LAST_MODIFIED
public static final int DEFAULT_ROBOTS_CACHE_SIZE
public static java.util.Map robotsLocksTable
Constructor Detail |
public GCSHttpConnection(java.net.URL u) throws java.io.IOException
u
- URL object for which a connection object is createdMethod Detail |
public java.lang.String getDateModified()
public java.lang.String getContentEncoding()
getContentEncoding
in class java.net.URLConnection
public static void setProxy(java.lang.String _proxyHost, int _proxyPort)
public java.io.InputStream getInputStream() throws java.io.IOException
getInputStream
in class java.net.URLConnection
java.io.IOException
- when a File IO exception happensURLConnection.getInputStream()
public int getContentLength()
getContentLength
in class java.net.URLConnection
private URL getURLAfterConversion(URL urlBefore) throws IOException {
public void connect() throws java.io.IOException
connected
field to true.connect
in class java.net.URLConnection
URLConnection.connect()
,
java.net.URLConnection#connected
public int getResponseCode()
public java.lang.String getResponseMessage()
public java.util.Hashtable getHeaders()
com.ibm.almaden.gcs.gcsurl.GCSHttpConnection#getHeaderField(String fieldName)
,
HTTP RFC for headerspublic java.lang.String getHeaderField(java.lang.String fieldName)
getHeaderField
in class java.net.URLConnection
fieldName
- the attribute name in the header whose value is to be obtainedfieldName
com.ibm.almaden.gcs.gcsurl.GCSHttpConnection#getHeader()
,
HTTP RFC for headerspublic void disconnect()
public java.lang.String getContentType()
getContentType
in class java.net.URLConnection
public java.lang.Object getContent() throws java.io.IOException
This method determines if robots are allowed to crawl the object.
getContent
in class java.net.URLConnection
java.io.IOException
- if an I/O error occurs while getting the contentURLConnection.getContent()
public void setIfModifiedSince(long ifmodifiedsince)
setIfModifiedSince
in class java.net.URLConnection
ifmodifiedsince
- the new value.URLConnection.getIfModifiedSince()
public void setRequestProperty(java.lang.String key, java.lang.String value)
HTTP requires all request properties which can legally have multiple instances with the same key to use a comma-seperated list syntax which enables multiple properties to be appended into a single property. Stores values in a hashmap.
setRequestProperty
in class java.net.URLConnection
key
- the keyword by which the request is known
(e.g., "accept
").value
- the value associated with it.getRequestProperty(java.lang.String)
public java.lang.String getRequestProperty(java.lang.String key)
getRequestProperty
in class java.net.URLConnection
key
- the keyword by which the request is known (e.g., "accept").setRequestProperty(java.lang.String, java.lang.String)
public void setAuthorization(java.lang.String username, char[] password)
public static java.lang.String guessContentTypeFromStream(java.io.InputStream is) throws java.io.IOException
public void setRequestMethod(java.lang.String method) throws java.net.ProtocolException
public java.lang.String getRequestMethod()
public void setOutContent(java.lang.String outContent)
public java.lang.String getOutContent()
public boolean outContentIsEmpty()
public boolean robotsAllowed()
public RobotsProcessor getRobotsProcessor()
public boolean checkIfAllowedToCrawl(java.net.URL u)
|
EIP Web Crawler APIs | ||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |