《JAVA爬蟲獲取網(wǎng)頁URL源碼》由會(huì)員分享,可在線閱讀,更多相關(guān)《JAVA爬蟲獲取網(wǎng)頁URL源碼(4頁珍藏版)》請(qǐng)?jiān)谘b配圖網(wǎng)上搜索。
1、文檔供參考,可復(fù)制、編制,期待您的好評(píng)與關(guān)注! package fileAndStringOperate;import java.io.BufferedReader;import java.io.File;import java.io.FileOutputStream;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;import java.io.OutputStream;public class ReadFileToString public static void main(Stri
2、ng args) throws IOExceptionString fileData = readtxt(D:IR1JAVA網(wǎng)絡(luò)爬蟲.txt);CountRelatedDocument(fileData);/MyWebRec myWebRec = new MyWebRec2000;ClassifyURL(fileData);private static String readtxt(String filePath) throws IOExceptionBufferedReader br=new BufferedReader(new FileReader(filePath);String str
3、=;String r=br.readLine();while(r!=null)str+=r+n;r=br.readLine();return str;private static char readtxtToChar(String filePath) throws IOExceptionchar data = null;int currentline = 0;BufferedReader br=new BufferedReader(new FileReader(filePath);String str=;String r=br.readLine();while(r!=null)str+=r+n
4、;datacurrentline+ = str.toCharArray();r=br.readLine();return data;private static int CountRelatedDocument(String fileData)int cntRelated = 0;int currentLine = 1;int baiduCnt = 0;int bingCnt = 0;int sogouCnt = 0;int _360Cnt = 0;int State = 0;boolean baiduY = new boolean51;boolean bingY = new boolean5
5、1;boolean sogouY = new boolean51;boolean _360Y = new boolean51;char fileChar = fileData.toCharArray();for(int i = 0; i fileChar.length;i +)if(fileChari = # & fileChari+1 = *)State+;System.out.println(分點(diǎn):+currentLine);if(fileChari = n)currentLine +;if(currentLine %3 = 0)if(fileChari = Y|fileChari = y
6、)if(State = 1)baiduCnt +;baiduYcurrentLine/3 = true;else if(State = 2)bingCnt +;bingYcurrentLine/3 - 50 = true;/System.out.println(currentLine/3 - 50);else if(State = 3)sogouCnt +;sogouYcurrentLine/3 - 100 =true;else if(State = 4)_360Cnt +;_360YcurrentLine/3 - 150 = true;cntRelated+;tryCaculatePR(ba
7、iduY,bingY,_360Y,sogouY,cntRelated);catch(Exception e)e.getMessage();System.out.println(cntRelated = +cntRelated);System.out.println(baiduCnt = +baiduCnt);System.out.println(bingCnt = +bingCnt);System.out.println(sohuCnt = +sogouCnt);System.out.println(_360Cnt = +_360Cnt);System.out.println(currentL
8、ine = + currentLine);return cntRelated;private static void ClassifyURL(String fileData)String str1 = fileData.split(n);for(int i = 0;i str1.length;i +)if(str1i.startsWith(#*)str1i = str1i.substring(10);private static void CaculatePR(boolean baiduY,boolean bingY,boolean _360Y,boolean sogouY,int cntRe
9、lated) throws IOExceptiondouble baiduP = new double 51;double baiduR = new double 51;double bingP = new double 51;double bingR = new double 51;double sogouP = new double 51;double sogouR = new double 51;double _360P = new double 51;double _360R = new double 51;int cntBaidu = 0;int cntBing = 0;int cn
10、tSogou = 0;int cnt360 = 0;/System.out.println(Related:+cntRelated);for(int i = 1;i = 50;i +)if(baiduYi = true)cntBaidu +;if(bingYi = true)cntBing +;if(sogouYi = true)cntSogou +;if(_360Yi = true)cnt360 +;baiduPi = 1.000000*cntBaidu/i;baiduRi = 1.000000*cntBaidu/cntRelated;bingPi = 1.000000*cntBing/i;
11、bingRi = 1.000000*cntBing/cntRelated;sogouPi = 1.000000*cntSogou/i;sogouRi = 1.000000*cntSogou/cntRelated;_360Pi = 1.000000*cnt360/i;_360Ri = 1.000000*cnt360/cntRelated;/System.out.print(cntBaidu+t+cntBing+t+cntSogou+t+cnt360+n);CaculateMAP(baiduP,bingP,sogouP, _360P);File file = new File(D:/JAVA網(wǎng)絡(luò)爬
12、蟲.txt);FileWriter out = new FileWriter(file); /文件寫入流 /將數(shù)組中的數(shù)據(jù)寫入到文件中。每行各數(shù)據(jù)之間TAB間隔 for(int i=1;i=50;i+) out.write(baidu+t+baiduPi+t + baiduRi+rn); for(int i=1;i=50;i+) out.write(bing+t+bingPi+t + bingRi+rn); for(int i=1;i=50;i+) out.write(sogou+t+sogouPi+t + sogouRi+rn); for(int i=1;i=50;i+) out.write
13、(360+t+_360Pi+t + _360Ri+rn); out.close();private static void CaculateMAP(double baiduP,double bingP,double sogouP,double _360P)double baiduPre=0,bingPre=0,sogouPre=0,_360Pre=0;for(int i = 1;i = 50;i + )baiduPre += baiduPi;bingPre += bingPi;sogouPre += sogouPi;_360Pre += _360Pi;baiduPre /= 50;bingPre /= 50;sogouPre /= 50;_360Pre /= 50;System.out.println(baidu:+baiduPre+bing:+bingPre+sogou:+sogouPre+360:+_360Pre);4 / 4