用httpclient抓取火车票信息

Bianca ·
更新时间:2024-11-14
· 855 次阅读

  一个通过httpclient抓取火车票信息的程序,需要修改下才能跑通,需要自己封装下httpclient,然后用get方式调用,还有fastJson,需要自己去解析下获得的数据,catchTrainInfo()是入口方法   import java.io.UnsupportedEncodingException;   import java.net.URLEncoder;   import java.util.HashMap;   import java.util.HashSet;   import java.util.Iterator;   import java.util.Map;   import java.util.Map.Entry;   import javax.annotation.Resource;   import org.slf4j.Logger;   import org.slf4j.LoggerFactory;   import java.util.Set;   public class CatchTrainInfo {   /**   * 抓取火车票信息   * @param attachmentId   * @return   */   @SuppressWarnings("unchecked")   public  String catchTrainInfo() {   //车站信息   HashSet<String> station = new HashSet<String>();   //临时车站信息   HashSet<String> stationTmp = new HashSet<String>();   //车次信息   Map<String,String> train = new HashMap<String,String>();   String stationHtml = HttpClientUtil.get("http://www.59178.com/zhan/");   String stationName = "";   //截取车站信息   String stations[] = stationHtml.split("htm'>");   for (int i = 0; i < stations.length; i++) {   if (i == 0 ) {   continue;   }   stationName = getStation(stations[i]);   if("".equals(stationName)) {   continue;   }   station.add(stationName);   }   //循环,根据车站信息获取车次信息   getTrainsByStation(station,train);   //循环,根据车次获取车次详情,并保存入库   getTrainDetailsByTrains(stationTmp,train);   //继续执行3次循环,(本来应该stationTmp.size()==0的时候,防止无限循环)   int loop = 3;   while (loop > 0) {   //stationTmp 和 station比较,stationTmp去掉已经遍历过的站点,station用stationTmp替代,继续循环   HashSet<String> stationTmp1 = (HashSet<String>) stationTmp.clone();   for (String stationTmpElement : stationTmp) {   for (String stationElement : station) {   if(stationTmpElement.equals(stationElement)) {   stationTmp.remove(stationTmpElement);   }   }   }   station = stationTmp1;   //循环,根据车站信息获取车次信息   getTrainsByStation(stationTmp,train);   //循环,根据车次获取车次详情,并写入数据库   getTrainDetailsByTrains(stationTmp,train);   loop--;   }   return "success!";   }   /**   * 根据车次获取车次详情,并写入数据库   * @param stationTmp   * @param train   */   private  void getTrainDetailsByTrains(HashSet<String> stationTmp, Map<String, String> train) {   Iterator<Entry<String, String>> iterator = train.entrySet().iterator();   while (iterator.hasNext()) {   Entry<String, String> entry = iterator.next();   String key =  entry.getKey();   String value =  entry.getValue();   if("unused".equals(value)) {   getAndSaveTrainsDetails(key,stationTmp);   entry.setValue("used");   }   try {   //休眠一会,防止反爬虫   Thread.sleep(100);   } catch (InterruptedException e) {   e.printStackTrace();   }   }   }   /**   * 根据车站信息获取车次信息   * @param station   * @param train   */   private  void getTrainsByStation(HashSet<String> station, Map<String, String> train) {   Iterator<String> iterator = station.iterator();   while (iterator.hasNext()) {   String key = iterator.next();   getTrainsInfo(train,key);   try {   //休眠一会,防止反爬虫   Thread.sleep(100);   } catch (InterruptedException e) {   e.printStackTrace();   }   }   }   /**   * 根据车站信息得到车次信息   * @param train   * @param stationName   * @return   */   private  void getTrainsInfo(Map<String, String> train, String stationName) {   try {   stationName = URLEncoder.encode(stationName,"utf-8");   } catch (UnsupportedEncodingException e1) {   log.error("转码出错了!stationName:" + stationName);   }   String url = "http://train.qunar.com/qunar/stationInfo.jsp?q=" + stationName + "&format=json";   //根据车站信息获取车次  ticketInfo   String ticketInfo = HttpClientUtil.get(url);   try{   @SuppressWarnings("unchecked")   java.util.Map<String, Object> ticketInfos = (Map<String, Object>) JSONObject.parseObject(ticketInfo, java.util.Map.class).get("ticketInfo");   Set<Entry<String, Object>> entries = ticketInfos.entrySet( );   if (entries != null) {   Iterator<Entry<String, Object>> iterator = entries.iterator( );   while (iterator.hasNext( )) {   Entry<String, Object> entry = iterator.next();   String key = (String) entry.getKey( );   String trains[] = key.split("/");   for(int i = 0; i< trains.length; i++) {   train.put(trains[i], "unused");   }   }   }   } catch (Exception e) {   log.info("根据车站信息得到车次信息报错:"+e.getMessage());   }   }   /**   * 解析得到车站信息   * @param str   * @return   */   private  String getStation(String str) {   if (str == null || str.length() <= 0) {   return "";   }   int pos = -1;   pos = str.indexOf("</a>", pos + 1);   if (pos == -1) {   return "";   }   return str.substring(0, pos);   }   /**   * 根据车次获取车次详情,并保存入库   *   * @param key   * @param stationTmp   */   public void getAndSaveTrainsDetails(String key, HashSet<String> stationTmp) {   String url = "http://train.qunar.com/qunar/checiInfo.jsp?q=" + key + "&date=20170107&format=json";   String trainScheduleBody = HttpClientUtil.getUtf8(url);   try {   List<Object> ticketInfos = (List<Object>) JSONObject.parseObject(trainScheduleBody, Map.class)   .get("trainScheduleBody");   for (int i = 0; i < ticketInfos.size(); i++) {   List<String> list = (List<String>) JSONObject.parseObject(ticketInfos.get(i).toString(), Map.class)   .get("content");   if (list == null || list.size() <= 0) {   continue;   }   stationTmp.add(list.get(1));   // 得到详情,更新入库   //TODO    }   } catch (Exception e) {   log.info("根据车次获取车次详情报错:" + e.getMessage());   }   }   }



火车 火车票 httpclient

需要 登录 后方可回复, 如果你还没有账号请 注册新账号