求java获取网页源代码
发布网友
发布时间:2022-06-20 16:24
我来回答
共2个回答
热心网友
时间:2024-11-20 12:19
其实你没理解http协议,你能获取html源码,就表明你http协议是成功的,是状态码是200,500就表示网站内部出错了,你也没办法
给你一段代码,这是我写的,应该可以获取正常的
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class WebClient {
public static String getWebContent(String urlString, final String charset,
int timeout) throws IOException {
if (urlString == null || urlString.length() == 0) {
return null;
}
urlString = (urlString.startsWith("http://") || urlString
.startsWith("https://")) ? urlString : ("http://" + urlString)
.intern();
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn
.setRequestProperty(
"User-Agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
conn.setRequestProperty("Accept", "text/html");
conn.setConnectTimeout(timeout);
try {
if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) {
return null;
}
} catch (IOException e) {
e.printStackTrace();
return null;
}
InputStream input = conn.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(input, charset));
String line = null;
StringBuffer sb = new StringBuffer();
while ((line = reader.readLine()) != null) {
sb.append(line).append("\r\n");
}
if (reader != null) {
reader.close();
}
if (conn != null) {
conn.disconnect();
}
return sb.toString();
}
public static String getWebContent(String urlString) throws IOException {
return getWebContent(urlString, "iso-8859-1", 5000);
}
public static void main(String[] args) throws IOException {
String s = getWebContent("http://www.baidu.com");
s = new String(s.getBytes("iso-8859-1"), "gb2312");
System.out.println(s);
}
}
=====
获取conn.getResponseCode()是500的状态码不就行了吗
热心网友
时间:2024-11-20 12:14
源代码你应该获取不到吧。。。你获取的是服务器发过来经过处理过的代码
热心网友
时间:2024-11-20 12:14
其实你没理解http协议,你能获取html源码,就表明你http协议是成功的,是状态码是200,500就表示网站内部出错了,你也没办法
给你一段代码,这是我写的,应该可以获取正常的
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class WebClient {
public static String getWebContent(String urlString, final String charset,
int timeout) throws IOException {
if (urlString == null || urlString.length() == 0) {
return null;
}
urlString = (urlString.startsWith("http://") || urlString
.startsWith("https://")) ? urlString : ("http://" + urlString)
.intern();
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn
.setRequestProperty(
"User-Agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
conn.setRequestProperty("Accept", "text/html");
conn.setConnectTimeout(timeout);
try {
if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) {
return null;
}
} catch (IOException e) {
e.printStackTrace();
return null;
}
InputStream input = conn.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(input, charset));
String line = null;
StringBuffer sb = new StringBuffer();
while ((line = reader.readLine()) != null) {
sb.append(line).append("\r\n");
}
if (reader != null) {
reader.close();
}
if (conn != null) {
conn.disconnect();
}
return sb.toString();
}
public static String getWebContent(String urlString) throws IOException {
return getWebContent(urlString, "iso-8859-1", 5000);
}
public static void main(String[] args) throws IOException {
String s = getWebContent("http://www.baidu.com");
s = new String(s.getBytes("iso-8859-1"), "gb2312");
System.out.println(s);
}
}
=====
获取conn.getResponseCode()是500的状态码不就行了吗
热心网友
时间:2024-11-20 12:11
源代码你应该获取不到吧。。。你获取的是服务器发过来经过处理过的代码