當想獲得某個網站上的網頁資訊時,除非該網站有提供相對應的API外,最直
接的方法就是 擷取他HTML的原始碼,透過HTML原始碼來獲取我們想截取的資
訊。 而以下的Function就是用 來獲取某個網頁的HTML原始碼。
import java.io.IOException;以下為自己練習寫的Android,透過上面的方法獲取Proxy Server List,並且列出來
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
private String GetHTML(String URLString)
{
try
{
HttpURLConnection urlConnection = null;
URL url = new URL(URLString);
//URLString為網址
//假設想透過Proxy Server來連接到網址時可透過
//URL url = new URL("http",String proxyip,int proxyport, URLString)
urlConnection = (HttpURLConnection) url.openConnection();
urlConnection.setRequestMethod("GET");
urlConnection.setDoOutput(true);
urlConnection.setDoInput(true);
urlConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
urlConnection.setRequestProperty("Content-type", "text/html; charset= big5");
urlConnection.connect();
InputStream htmlbody = urlConnection.getInputStream();
// ===============
if (htmlbody != null)
{
int leng = 0;
byte[] Data = new byte[100];
byte[] totalData = new byte[0];
int totalLeg = 0;
do
{
leng = htmlbody.read(Data);
if (leng > 0)
{
totalLeg += leng;
byte[] temp = new byte[totalLeg];
System.arraycopy(totalData, 0, temp, 0, totalData.length);
System.arraycopy(Data, 0, temp, totalData.length, leng);
totalData = temp;
}
}
while (leng > 0);
return new String(totalData, "UTF-8"); //回傳HTML原始碼
}
else
{
return "Undefined";
}
}
catch (MalformedURLException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
return "Undefined";
}
catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
return "Undefined";
}
}



沒有留言:
張貼留言