一个通过httpclient抓取火车票信息的程序,需要修改下才能跑通,需要自己封装下httpclient,然后用get方式调用,还有fastJson,需要自己去解析下获得的数据,catchTrainInfo()是入口方法 import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import javax.annotation.Resource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Set; public class CatchTrainInfo { /** * 抓取火车票信息 * @param attachmentId * @return */ @SuppressWarnings("unchecked") public String catchTrainInfo() { //车站信息 HashSet<String> station = new HashSet<String>(); //临时车站信息 HashSet<String> stationTmp = new HashSet<String>(); //车次信息 Map<String,String> train = new HashMap<String,String>(); String stationHtml = HttpClientUtil.get("http://www.59178.com/zhan/"); String stationName = ""; //截取车站信息 String stations[] = stationHtml.split("htm'>"); for (int i = 0; i < stations.length; i++) { if (i == 0 ) { continue; } stationName = getStation(stations[i]); if("".equals(stationName)) { continue; } station.add(stationName); } //循环,根据车站信息获取车次信息 getTrainsByStation(station,train); //循环,根据车次获取车次详情,并保存入库 getTrainDetailsByTrains(stationTmp,train); //继续执行3次循环,(本来应该stationTmp.size()==0的时候,防止无限循环) int loop = 3; while (loop > 0) { //stationTmp 和 station比较,stationTmp去掉已经遍历过的站点,station用stationTmp替代,继续循环 HashSet<String> stationTmp1 = (HashSet<String>) stationTmp.clone(); for (String stationTmpElement : stationTmp) { for (String stationElement : station) { if(stationTmpElement.equals(stationElement)) { stationTmp.remove(stationTmpElement); } } } station = stationTmp1; //循环,根据车站信息获取车次信息 getTrainsByStation(stationTmp,train); //循环,根据车次获取车次详情,并写入数据库 getTrainDetailsByTrains(stationTmp,train); loop--; } return "success!"; } /** * 根据车次获取车次详情,并写入数据库 * @param stationTmp * @param train */ private void getTrainDetailsByTrains(HashSet<String> stationTmp, Map<String, String> train) { Iterator<Entry<String, String>> iterator = train.entrySet().iterator(); while (iterator.hasNext()) { Entry<String, String> entry = iterator.next(); String key = entry.getKey(); String value = entry.getValue(); if("unused".equals(value)) { getAndSaveTrainsDetails(key,stationTmp); entry.setValue("used"); } try { //休眠一会,防止反爬虫 Thread.sleep(100); } catch (InterruptedException e) { e.printStackTrace(); } } } /** * 根据车站信息获取车次信息 * @param station * @param train */ private void getTrainsByStation(HashSet<String> station, Map<String, String> train) { Iterator<String> iterator = station.iterator(); while (iterator.hasNext()) { String key = iterator.next(); getTrainsInfo(train,key); try { //休眠一会,防止反爬虫 Thread.sleep(100); } catch (InterruptedException e) { e.printStackTrace(); } } } /** * 根据车站信息得到车次信息 * @param train * @param stationName * @return */ private void getTrainsInfo(Map<String, String> train, String stationName) { try { stationName = URLEncoder.encode(stationName,"utf-8"); } catch (UnsupportedEncodingException e1) { log.error("转码出错了!stationName:" + stationName); } String url = "http://train.qunar.com/qunar/stationInfo.jsp?q=" + stationName + "&format=json"; //根据车站信息获取车次 ticketInfo String ticketInfo = HttpClientUtil.get(url); try{ @SuppressWarnings("unchecked") java.util.Map<String, Object> ticketInfos = (Map<String, Object>) JSONObject.parseObject(ticketInfo, java.util.Map.class).get("ticketInfo"); Set<Entry<String, Object>> entries = ticketInfos.entrySet( ); if (entries != null) { Iterator<Entry<String, Object>> iterator = entries.iterator( ); while (iterator.hasNext( )) { Entry<String, Object> entry = iterator.next(); String key = (String) entry.getKey( ); String trains[] = key.split("/"); for(int i = 0; i< trains.length; i++) { train.put(trains[i], "unused"); } } } } catch (Exception e) { log.info("根据车站信息得到车次信息报错:"+e.getMessage()); } } /** * 解析得到车站信息 * @param str * @return */ private String getStation(String str) { if (str == null || str.length() <= 0) { return ""; } int pos = -1; pos = str.indexOf("</a>", pos + 1); if (pos == -1) { return ""; } return str.substring(0, pos); } /** * 根据车次获取车次详情,并保存入库 * * @param key * @param stationTmp */ public void getAndSaveTrainsDetails(String key, HashSet<String> stationTmp) { String url = "http://train.qunar.com/qunar/checiInfo.jsp?q=" + key + "&date=20170107&format=json"; String trainScheduleBody = HttpClientUtil.getUtf8(url); try { List<Object> ticketInfos = (List<Object>) JSONObject.parseObject(trainScheduleBody, Map.class) .get("trainScheduleBody"); for (int i = 0; i < ticketInfos.size(); i++) { List<String> list = (List<String>) JSONObject.parseObject(ticketInfos.get(i).toString(), Map.class) .get("content"); if (list == null || list.size() <= 0) { continue; } stationTmp.add(list.get(1)); // 得到详情,更新入库 //TODO } } catch (Exception e) { log.info("根据车次获取车次详情报错:" + e.getMessage()); } } }