java開(kāi)發(fā)項(xiàng)目集錦(附源碼)Word版

資源ID：52157201 資源大?。?span id="lbc5u0o" class="font-tahoma">234.50KB 全文頁(yè)數(shù)：70頁(yè)
資源格式： DOC 下載積分：0積分

快捷下載

會(huì)員登錄下載

微信登錄下載

三方登錄下載：

微信掃一掃登錄

下載資源需要0積分

郵箱/手機(jī)：
溫馨提示：	用戶名和密碼都是您填寫(xiě)的郵箱或者手機(jī)號(hào)，方便查詢和重復(fù)下載（系統(tǒng)自動(dòng)生成）
支付說(shuō)明：	本站最低充值0.01積分，下載本資源后余額將會(huì)存入您的賬戶，您可在我的個(gè)人中心查看。
驗(yàn)證碼：	換一換

賬號(hào)：
密碼：
驗(yàn)證碼：	換一換
當(dāng)日自動(dòng)登錄忘記密碼？

友情提示

1、下載資料失敗解決辦法

2、PDF文件下載后，可能會(huì)被瀏覽器默認(rèn)打開(kāi)，此種情況可以點(diǎn)擊瀏覽器菜單，保存網(wǎng)頁(yè)到桌面，就可以正常下載了。

3、本站不支持迅雷下載，請(qǐng)使用電腦自帶的IE瀏覽器，或者360瀏覽器、谷歌瀏覽器下載即可。

4、本站資源下載后的文檔和圖紙-無(wú)水印,預(yù)覽文檔經(jīng)過(guò)壓縮，下載后原文更清晰。

5、試題試卷類(lèi)文檔，如果標(biāo)題沒(méi)有明確說(shuō)明有答案則都視為沒(méi)有答案，請(qǐng)知曉。

網(wǎng)站客服

侵權(quán)投訴

java開(kāi)發(fā)項(xiàng)目集錦(附源碼)Word版

新浪天氣預(yù)報(bào)新聞java抓去程序package .weather1;import java.io.BufferedReader;import java.io.ByteArrayOutputStream;import java.io.File;import java.io.FileWriter;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.PrintWriter;import .URL;import .URLConnection;import java.util.regex.Matcher;import java.util.regex.Pattern;import mons.logging.Log;import mons.logging.LogFactory;import .update.Getdata;/* * 正則方式抓取新浪天氣新聞上的新聞 * 地址 * param args */public class Newlist private static final Log log = LogFactory.getLog(Newlist.class); /* * 測(cè)試 * param args */ public static void main(String args) Newlist n=new Newlist(); String k=n.getNewList(); for (int i=0;i<k.length;i+) System.out.println(ki.replace("href="", "href="newinfo2.jsp?url="); String m=n.getNewinfo("news/2008/1119/35261.html"); for (int l=0;l<m.length;l+) System.out.println(ml); /* * 由url地址獲得新聞內(nèi)容string推薦精選 * 新聞中的圖片下載到本地，文中新聞地址改成本地地址 * param url * return */ public String getNewinfo(String url) String URL=" /30是指取30段滿足給出的正則條件的字符串，如果只找出10個(gè)，那數(shù)組后面的全為null String s = analysis("<p>(.*?)</p>" , getContent(URL) , 30); for (int i=0;i<s.length;i+) Pattern sp = Ppile("src="(.*?)""); Matcher matcher = sp.matcher(si); if (matcher.find() String imageurl=analysis("src="(.*?)"" , si , 1)0; if(!imageurl.startsWith("http:/") imageurl=" System.out.println("新聞?dòng)袌D片:"+imageurl); String content=getContent(imageurl); String images=imageurl.split("/"); String imagename=imagesimages.length-1; System.out.println("圖片名:"+imagename); try File fwl = new File(imagename); PrintWriter outl = new PrintWriter(fwl); outl.println(content); outl.close(); catch (IOException e) / TODO Auto-generated catch block e.printStackTrace(); System.out.println("si:"+si); /修改文件圖片地址 si=si.replace(analysis("src="(.*?)"" , si , 1)0, imagename); return s; public String getNewList()推薦精選 String url=" return getNewList(getContent(url); private String getNewList(String content ) /String s = analysis("align="center" valign="top"><img src="./images/a(.*?).gif" width="70" height="65"></td>" , content , 50); String s = analysis("<li>(.*?)</li>" , content , 50); return s; private String analysis(String pattern, String match , int i) Pattern sp = Ppile(pattern); Matcher matcher = sp.matcher(match); String content = new Stringi; for (int i1 = 0; matcher.find(); i1+) contenti1 = matcher.group(1); /下面一段是為了剔除為空的串 int l=0; for (int k=0;k<content.length;k+) if (contentk=null) l=k; break; String content2; if (l!=0) content2=new Stringl; for (int n=0;n<l;n+) content2n=contentn; return content2; else return content; /* * 由地址獲取網(wǎng)頁(yè)內(nèi)容 * param strUrl * return private String getContent(String strUrl) try /URL url = new URL(strUrl); 推薦精選 /BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream(); URLConnection uc = new URL(strUrl).openConnection(); /通過(guò)修改http頭的User-Agent來(lái)偽裝成是通過(guò)瀏覽器提交的請(qǐng)求 uc.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows XP; DigExt)"); System.out.println("-"); System.out.println("Content-Length: "+uc.getContentLength(); System.out.println("Set-Cookie: "+uc.getHeaderField("Set-Cookie"); System.out.println("-"); /獲取文件頭信息 System.out.println("Header"+uc.getHeaderFields().toString(); System.out.println("-"); BufferedReader br=new BufferedReader(new InputStreamReader(uc.getInputStream(), "gb2312"); String s = "" StringBuffer sb=new StringBuffer(); while(s = br.readLine()!=null) sb.append(s+"rn"); System.out.println("長(zhǎng)度+"+sb.toString().length(); return sb.toString(); catch(Exception e) return "error open url" + strUrl; */ public static String getContent (String strUrl) URLConnection uc = null; String all_content=null; try all_content =new String(); URL url = new URL(strUrl); uc = url.openConnection(); uc.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows XP; DigExt)"); System.out.println("-"); System.out.println("Content-Length: "+uc.getContentLength(); System.out.println("Set-Cookie: "+uc.getHeaderField("Set-Cookie"); 推薦精選 System.out.println("-"); /獲取文件頭信息 System.out.println("Header"+uc.getHeaderFields().toString(); System.out.println("-"); if (uc = null) return null; InputStream ins = uc.getInputStream(); ByteArrayOutputStream outputstream = new ByteArrayOutputStream(); byte str_b = new byte1024; int i = -1; while (i=ins.read(str_b) > 0) outputstream.write(str_b,0,i); all_content = outputstream.toString(); / System.out.println(all_content); catch (Exception e) e.printStackTrace(); log.error("獲取網(wǎng)頁(yè)內(nèi)容出錯(cuò)"); finally uc = null; / return new String(all_content.getBytes("ISO8859-1"); System.out.println(all_content.length(); return all_content; 現(xiàn)在的問(wèn)題是:圖片下載不全，我用后面兩種getContent方法下圖片，下來(lái)的圖片大小都和文件頭里獲得的Content-Length，也就是圖片的實(shí)際大小不符，預(yù)覽不了。而且反復(fù)測(cè)試，兩種方法每次下來(lái)的東西大小是固定的，所以重復(fù)下載沒(méi)有用？測(cè)試toString后length大小比圖片實(shí)際的小，而生成的圖片比圖片數(shù)據(jù)大。下載后存儲(chǔ)過(guò)程中圖片數(shù)據(jù)增加了！圖片數(shù)據(jù)流toString過(guò)程中數(shù)據(jù)大小發(fā)生了改變，還原不回來(lái)。其它新聞內(nèi)容沒(méi)有問(wèn)題。估計(jì)是圖片的編碼格式等的問(wèn)題。在圖片數(shù)據(jù)流讀過(guò)來(lái)時(shí)直接生成圖片就可以了。public int saveImage (String strUrl) URLConnection uc = null; try URL url = new URL(strUrl); uc = url.openConnection(); uc.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows XP; DigExt)"); 推薦精選 /uc.setReadTimeout(30000); /獲取圖片長(zhǎng)度 /System.out.println("Content-Length: "+uc.getContentLength(); /獲取文件頭信息 /System.out.println("Header"+uc.getHeaderFields().toString(); if (uc = null) return 0; InputStream ins = uc.getInputStream(); byte str_b = new byte1024; int byteRead=0; String images=strUrl.split("/"); String imagename=imagesimages.length-1; File fwl = new File(imagename); FileOutputStream fos= new FileOutputStream(fwl); while (byteRead=ins.read(str_b) > 0) fos.write(str_b,0,byteRead); ; fos.flush(); fos.close(); catch (Exception e) e.printStackTrace(); log.error("獲取網(wǎng)頁(yè)內(nèi)容出錯(cuò)"); finally uc = null; return 1; 方法二：首先把搜索后的頁(yè)面用流讀取出來(lái)，再寫(xiě)個(gè)正則，去除不要的內(nèi)容，再把最后的結(jié)果存成xml格式文件、或者直接存入數(shù)據(jù)庫(kù)，用的時(shí)候再調(diào)用本代碼只是顯示html頁(yè)的源碼內(nèi)容，如果需要抽取內(nèi)容請(qǐng)自行改寫(xiě)public static String regex()中的正則式 package rssTest; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import .HttpURLConnection; import .MalformedURLException; 推薦精選import .URL; import .URLConnection; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class MyRSS /* * 獲取搜索結(jié)果的html源碼 * */ public static String getHtmlSource(String url) StringBuffer codeBuffer = null; BufferedReader in=null; try URLConnection uc = new URL(url).openConnection(); /* * 為了限制客戶端不通過(guò)網(wǎng)頁(yè)直接讀取網(wǎng)頁(yè)內(nèi)容,就限制只能從瀏覽器提交請(qǐng)求. * 但是我們可以通過(guò)修改http頭的User-Agent來(lái)偽裝,這個(gè)代碼就是這個(gè)作用 * */ uc.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows XP; DigExt)"); / 讀取url流內(nèi)容 in = new BufferedReader(new InputStreamReader(uc .getInputStream(), "gb2312"); codeBuffer = new StringBuffer(); String tempCode = "" / 把buffer內(nèi)的值讀取出來(lái),保存到code中 while (tempCode = in.readLine() != null) codeBuffer.append(tempCode).append("n"); in.close(); catch (MalformedURLException e) 推薦精選 e.printStackTrace(); catch (IOException e) e.printStackTrace(); return codeBuffer.toString(); /* * 正則表達(dá)式 * */ public static String regex() String googleRegex = "<div class=g>(.*?)href="(.*?)"(.*?)">(.*?)</a>(.*?)<div class=std>(.*?)<br>" return googleRegex; /* * 測(cè)試用 * 在google中檢索關(guān)鍵字，并抽取自己想要的內(nèi)容 * * */ public static List<String> GetNews() List<String> newsList = new ArrayList<String>(); String allHtmlSource = MyRSS .getHtmlSource(" maxthon&hs=SUZ&q=%E8%A7%81%E9%BE%99%E5%8D%B8%E7%94%B2&meta=&aq=f"); Pattern pattern = Ppile(regex(); Matcher matcher = pattern.matcher(allHtmlSource); while (matcher.find() String urlLink = matcher.group(2); String title = matcher.group(4); title = title.replaceAll("<font color=CC0033>", ""); title = title.replaceAll("</font>", ""); title = title.replaceAll("<b>.</b>", ""); 推薦精選 String content = matcher.group(6); content = content.replaceAll("<font color=CC0033>", ""); content = content.replaceAll("</font>", ""); content = content.replaceAll("<b>.</b>", ""); newsList.add(urlLink); newsList.add(title); newsList.add(content); return newsList; /* * main方法 * */ public static void main(String args) System.out .println(MyRSS .getHtmlSource(" 方法三：jsp自動(dòng)抓取新聞自動(dòng)抓取新聞package com.news.spider;import java.io.File;import java.io.FileFilter;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.Calendar;import java.util.Date;import java.util.List;import java.util.regex.Matcher;import java.util.regex.Pattern;import com.db.DBAccess;public class SpiderNewsServer public static void main(String args) throws Exception /設(shè)置抓取信息的首頁(yè)面 String endPointUrl = " /獲得當(dāng)前時(shí)間推薦精選 Calendar calendar=Calendar.getInstance(); SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd"); String DateNews = sdf.format(calendar.getTime(); /* * 抓取二級(jí)URl 開(kāi)始 * url匹配類(lèi)型：" */ List listNewsType = new ArrayList(); /取入口頁(yè)面html WebHtml webHtml = new WebHtml(); String htmlDocuemtnt1 = webHtml.getWebHtml(endPointUrl); if(htmlDocuemtnt1 = null | htmlDocuemtnt1.length() = 0) return; String strTemp1 = " String strTemp2 = "</li>" int stopIndex=0; int startIndex=0; int dd=0; while(true) dd+; startIndex = htmlDocuemtnt1.indexOf(strTemp1, stopIndex); System.out.println("="+startIndex); stopIndex= htmlDocuemtnt1.indexOf(strTemp2, startIndex); System.out.println("=-"+stopIndex); if(startIndex!=-1 && stopIndex!=-1) String companyType=htmlDocuemtnt1.substring(startIndex,stopIndex); System.out.println("-"+companyType); System.out.println("-"+companyType.indexOf("""); companyType=companyType.substring(0,companyType.indexOf("""); System.out.println("#-"+companyType); listNewsType.add(companyType); if(dd>10) break; if(stopIndex=-1 | startIndex=-1) break; System.out.println("listCompanyType="+listNewsType.size(); /*推薦精選 * 抓取二級(jí)URl 結(jié)束 */ /* * 抓取頁(yè)面內(nèi)容開(kāi)始 */ String title="" String hometext="" String bodytext="" String keywords="" String counter = "221" String cdate= "" int begainIndex=0;/檢索字符串的起點(diǎn)索引 int endIndex=0;/檢索字符串的終點(diǎn)索引 String begainStr;/檢索開(kāi)始字符串 String endStr;/檢索結(jié)束字符串 for (int rows = 1; rows < listNewsType.size(); rows+) String strNewsDetail = listNewsType.get(rows).toString(); System.out.println("strNewsDetail="+strNewsDetail); if(strNewsDetail != null && strNewsDetail.length() > 0) WebHtml newsListHtml = new WebHtml(); String htmlDocuemtntCom = newsListHtml.getWebHtml(strNewsDetail); System.out.println("$-"+htmlDocuemtntCom); if(htmlDocuemtntCom = null | htmlDocuemtntCom.length() = 0) return; /截取時(shí)間 int dateBegainIndex = htmlDocuemtntCom.indexOf("<div>時(shí)間："); System.out.println("%-"+dateBegainIndex); String newTime = htmlDocuemtntCom.substring(dateBegainIndex,dateBegainIndex+20); System.out.println("-"+newTime); String newTimeM = newTime.substring(newTime.lastIndexOf("-")+1,newTime.lastIndexOf("-")+3); String dateM = DateNews.substring(DateNews.lastIndexOf("-")+1); System.out.println("-"+newTimeM); System.out.println("-"+dateM); if(newTimeM = dateM | newTimeM.equals(dateM) /檢索新聞標(biāo)題 begainStr="<div class="divCon bg008 ">" 推薦精選 endStr="<div>時(shí)間：" begainIndex=htmlDocuemtntCom.indexOf(begainStr,0); System.out.println("&&&&&&-"+begainIndex); endIndex=htmlDocuemtntCom.indexOf(endStr,0); System.out.println("&&&&&&-"+endIndex); if(begainIndex!=-1 && endIndex!=-1) title = htmlDocuemtntCom.substring(begainIndex,endIndex).trim(); title = title.substring(title.indexOf("<h1>")+4,title.indexOf("</h1>"); title = title.replace("'", ""); title = title.replace("", ""); title = title.replace(" ", ""); /檢索新聞內(nèi)容 begainStr="<div class="divCon bg008 ">" endStr="<!- page begin ->" begainIndex=htmlDocuemtntCom.indexOf(begainStr,0); endIndex=htmlDocuemtntCom.indexOf(endStr,0); if(begainIndex!=-1 && endIndex!=-1) bodytext = htmlDocuemtntCom.substring(begainIndex,endIndex).trim(); if(bodytext.indexOf("<p>")>0 && bodytext.indexOf("</p>")>bodytext.indexOf("<p>") && bodytext.indexOf("</p>")>0) bodytext = bodytext.substring(bodytext.indexOf("<p>")+3,bodytext.indexOf("</p>"); bodytext=bodytext.replace(" ", ""); bodytext=bodytext.replace("<br>", ""); bodytext=bodytext.replace("n", "<br>"); bodytext=bodytext.replace("'", ""); bodytext=bodytext.replace("", ""); /簡(jiǎn)介 if(bodytext.length()>40) hometext = bodytext.substring(0,40)+"." else hometext = bodytext+"." /瀏覽量 String str = String.valueOf(Math.random(); counter = str.substring(str.lastIndexOf(".")+1,5); Calendar cal = Calendar.getInstance(); cal.setTime(new Date(); cdate = cal.getTimeInMillis()+""推薦精選 cdate = cdate.substring(0,10); else continue; System.out.println("-"+title); System.out.println("-"+cdate); System.out.println("-"+cdate); System.out.println("-"+hometext); System.out.println("-"+bodytext); System.out.println("-"+keywords); System.out.println("-"+counter); /*String str = "INSERT INTO ecim_stories(uid,title,created,published,hostname,hometext,bodytext,keywords,counter,topicid,ihome,notifypub,story_type,topicdisplay,topicalign,comments,rating,votes,description) " str += "VALUE (1,'"+title+"',"+cdate+","+cdate+",'125.122.83.177','"+hometext+"','"+bodytext+"','"+keywords+"',"+counter+",1,0,1,'admin',0,'R',0,0,0,'')" DBAccess db = new DBAccess(); if(db.executeUpdate(str)>0) System.out.println("-成功！"); else System.out.println("-失?。?quot;); */ /* * 抓取頁(yè)面內(nèi)容結(jié)束 */ package com.news.spider;import .URL;import .URLConnection;import java.io.BufferedReader;import java.io.InputStreamReader;public class WebHtml /* 根據(jù)url,抓取webhmtl內(nèi)容* param url推薦精選*/public String getWebHtml(String url) try UR

注意事項(xiàng)

本文（java開(kāi)發(fā)項(xiàng)目集錦(附源碼)Word版）為本站會(huì)員（每****）主動(dòng)上傳，裝配圖網(wǎng)僅提供信息存儲(chǔ)空間，僅對(duì)用戶上傳內(nèi)容的表現(xiàn)方式做保護(hù)處理，對(duì)上載內(nèi)容本身不做任何修改或編輯。若此文所含內(nèi)容侵犯了您的版權(quán)或隱私，請(qǐng)立即通知裝配圖網(wǎng)（點(diǎn)擊聯(lián)系客服），我們立即給予刪除！

溫馨提示：如果因?yàn)榫W(wǎng)速或其他原因下載失敗請(qǐng)重新下載，重復(fù)下載不扣分。