WEB开发网
开发学院软件开发Java Google爬取天气预报代码 阅读

Google爬取天气预报代码

 2009-09-17 00:00:00 来源:WEB开发网   
核心提示:用了国内的几个web service的天气预报服务,打着中央气象局的幌子,Google爬取天气预报代码,本来用的还好好地,过了几天,对国内的Free服务深感失望,心想还是用一直信赖的Google吧,发现不能调用了,原来是服务器超过请求次数了

用了国内的几个web service的天气预报服务,打着中央气象局的幌子,本来用的还好好地,过了几天,发现不能调用了,原来是服务器超过请求次数了,Free到这种程序,对国内的Free服务深感失望,心想还是用一直信赖的Google吧,所以就有了以下利用Http请求爬取Google天气预报的代码,并将请求过的城市天气预报按天缓存一下:

所有代码如下:

Java代码  

 public NodeList getWeatherDiv(String htmlUrl) { 
 NodeList res = null; 
 try{ 
  Parser parser = new Parser(htmlUrl); 
  parser.setEncoding("GBK"); 
 
  NodeFilter divFilter = new NodeClassFilter(Div.class); 
 
  OrFilter lastFilter = new OrFilter(); 
  lastFilter 
   .setPredicates(new NodeFilter[] { divFilter }); 
 
  NodeList nodeList = parser.parse(lastFilter); 
  Node[] nodes = nodeList.toNodeArray(); 
 
  for (int i = 0; i < nodes.length; i++) { 
  Node anode = (Node) nodes[i]; 
  if(anode instanceof Div){ 
   Div mydiv = (Div)anode; 
   String className = mydiv.getAttribute("class"); 
   if(className!=null && className.equals("e")){ 
   res = mydiv.getChildren(); 
   } 
  } 
  } 
 } catch (ParserException e) { 
  e.printStackTrace(); 
 } 
 return res; 
 }   
   
  public static void cleanCache() { 
   if(isStart) return; 
   isStart = true; 
    TimerTask task = new TimerTask() { 
      public void run() {     
       Iterator it = hmCache.entrySet().iterator(); 
       while (it.hasNext()) { 
       Map.Entry entry = (Map.Entry) it.next(); 
       Object key = entry.getKey(); 
       String today = DateTimeUtil.format(new Date(),"yyyyMMdd"); 
       if(key.toString().indexOf(today)>=0){ 
        it.remove(); 
        hmCache.remove(key); 
       }   
       }       
      } 
    }; 
    Timer timer = new Timer(); 
    timer.schedule(task, Calendar.getInstance ().getTime(), 24*3600 * 1000); 
 
   }  
 
   
  private void addWeatherDay(JSONObject json,int flag,String htmlContent){ 
   String tt = (flag==0?"t":("t"+flag)); 
   try{ 
 
    Node anode = null; 
    Parser parser = Parser.createParser(htmlContent, "GBK"); 
    NodeFilter textFilter = new NodeClassFilter(TextNode.class); 
    NodeFilter imgFilter = new NodeClassFilter(ImageTag.class); 
    
  OrFilter lastFilter = new OrFilter(); 
  lastFilter.setPredicates(new NodeFilter[] { textFilter,imgFilter }); 
  //String t = "",t_res = "",t_tp=""; 
  NodeList nodeList = parser.parse(lastFilter); 
  Node[] nodes = nodeList.toNodeArray(); 
  for (int i = 0; i < nodes.length; i++) { 
   anode = (Node) nodes[i]; 
   if(anode instanceof ImageTag){ 
   ImageTag img = (ImageTag)anode; 
   if(img!=null){ 
    json.put(tt+"_res", img.getAttribute("title")); 
    json.put(tt+"_result", img.getAttribute("title")); 
    json.put(tt+"_tp", ("http://www.google.cn"+img.getImageURL())); 
   } 
   }else if(anode instanceof TextNode){ 
   TextNode text = (TextNode)anode; 
   String t = text.getText(); 
   if(t.indexOf("°C")>0){ 
    json.put(tt, t); 
   } 
   } 
  } 
 
   }catch(Exception ex){ 
   ex.printStackTrace(); 
   } 
  } 
   
 private void getDivText(JSONObject json, String htmlContent) { 
 String line = ""; 
 Node anode = null; 
 Div divnode = null; 
 try { 
  Parser parser = Parser.createParser(htmlContent, "GBK"); 
  NodeFilter divFilter = new NodeClassFilter(Div.class); 
  OrFilter lastFilter = new OrFilter(); 
  lastFilter.setPredicates(new NodeFilter[] { divFilter }); 
 
  NodeList nodeList = parser.parse(lastFilter); 
  int idx = 0; 
  Node[] nodes = nodeList.toNodeArray(); 
  for (int i = 0; i < nodes.length; i++) { 
  anode = (Node) nodes[i]; 
  line = ""; 
  if (anode instanceof Div) { 
   divnode = (Div) anode; 
   String className = StrCharUtil.formatNullStr(divnode.getAttribute("class")); 
   String align = StrCharUtil.formatNullStr(divnode.getAttribute("align")); 
   if(align.equals("")) continue; 
   if(className.equals("") && align.equals("center")){ 
   line = divnode.getChildrenHTML(); 
   addWeatherDay(json,idx,line); 
   idx ++; 
   } 
   
  } 
  if (StrCharUtil.formatNullStr(line).equals("")) 
   continue; 
  } 
 } catch (ParserException pe) { 
  pe.printStackTrace(); 
 } 
 }   
   
 public JSONObject getWeather(String city){ 
   String today = DateTimeUtil.format(new Date(),"yyyyMMdd"); 
   if(hmCache.get(city+today)!=null){ 
   return hmCache.get(city+today); 
   }  
   JSONObject hm =new JSONObject(); 
 hm.put("zhishu",""); 
  
    
 try{ 
  city = getCityName(city); 
  final String googleWeatherURL = "http://www.google.cn/search?hl=zh-CN&newwindow=1&q=tq+"+URLEncoder.encode(city,"UTF-8")+"&aq=f&oq="; 
  
  NodeList nodeListDiv = getWeatherDiv(googleWeatherURL); 
  int idx = 0; 
  if(nodeListDiv!=null){ 
  getDivText(hm,nodeListDiv.toHtml()); 
  } 
  
  
 }catch(Exception ex){ 
  ex.printStackTrace(); 
 } 
  
     
    hmCache.put(city+today, hm); 
 return hm; 
 }

Tags:Google 天气预报 代码

编辑录入:爽爽 [复制链接] [打 印]
赞助商链接