用了jsoup和nutzDao来实现

上代码

static NutDao dao = new NutDao();
	static {
	Properties pp = System.getProperties();
	pp.put("driverClassName", "com.mysql.jdbc.Driver");
	pp.put("url", "jdbc:mysql://localhost:3306/wordpress?useUnicode=true&characterEncoding=utf-8");
	pp.put("username", "root");
	pp.put("password", "000000");
	DataSource ds = null;
	try {
		ds = BasicDataSourceFactory.createDataSource(System.getProperties());
		dao.setDataSource(ds);
	} catch (Exception e) {
		e.printStackTrace();
	}
	}
	public static void main(String[] args) throws MalformedURLException, IOException, InterruptedException {

	//createSqlFile();
	//	System.out.println("你好${1}".replace("${1}", "world"));
	}

	public static  void createSqlFile() throws IOException{
		BufferedReader reader = new BufferedReader(new FileReader(new File("javaeyepost.txt")));
		String line = "";
		boolean b = false;
		String title = null;
		String content = null;
		String date = null;
		int index = 25;
		while((line=reader.readLine())!= null){
			if(b){
				break;
			}
			if(line.length()>0){
				if(line.equals("post****over")){

				}else if(line.startsWith("title: ")){
					title = line.substring(7);
				}else if(line.startsWith("date: ")){
					System.out.println(line);
					date = line.substring(6)+" 00:00:00";
				}else if(line.startsWith("content: ")){
					StringBuffer sb = new StringBuffer();
					sb.append(line.substring(9));
					while((line=reader.readLine())!= null){
						if(line.equals("post*****over")){
							content = sb.toString();
							String encode = URLEncoder.encode(title);
							encode = encode.length()>200? encode.substring(0,200):encode;
							dao.insert("wp_posts", Chain.make("post_author", 1).add("post_date", date)
									.add("post_date_gmt", date).add("post_content", content).add("post_title", title)
									.add("post_status", "publish").add("comment_status", "open").add("ping_status", "open")
									.add("post_name", encode).add("post_modified", date).add("post_modified_gmt", date)
									.add("post_parent", 0).add("guid", "http://localhost:89/?p="+ index++).add("menu_order", 0)
									.add("post_type", "post").add("comment_count", 0).add("post_excerpt", "").add("post_password", "")
									.add("to_ping", "").add("pinged", "").add("post_content_filtered", "").add("post_mime_type", ""));
						//	System.out.println("*************************************");
							//b = true;
							break;
						}else{
							sb.append(line);
						}
					}
				}
			}
		}

	}

	// 需要注意的细节, 帖子类别可能不存在,要抓取下面的分页信息才行
	public static void fetchPost() throws UnsupportedEncodingException, IOException, InterruptedException{
		//Document document = Jsoup.parse(new URL("http://feiyan35488.iteye.com/?show_full=false"), 5000);
		FileOutputStream fos = new FileOutputStream("javaeyepost.txt");
		HttpConnection con = (HttpConnection) HttpConnection.connect("http://feiyan35488.iteye.com/?page=9&&show_full=true");
		con.userAgent("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.12 (KHTML, like Gecko) Chrome/9.0.576.0 Safari/534.12");
		Document document = con.get();
		Elements es = document.getElementsByClass("blog_main");
		System.out.println("共有帖子 :"+es.size());
		for(Element e : es){
			fos.write(("title:"+e.child(0).child(2).child(0).html()+"\n").getBytes("utf-8"));
			if(e.child(0).children().size()>3)
			  fos.write(("classify: "+e.child(0).child(3).child(0).html()+"\n").getBytes("utf-8"));
			fos.write(("date: "+e.child(0).child(0).child(0).html()+"-"+e.child(0).child(0).child(2).html()+"-"+e.child(0).child(0).child(4).html()+"\n").getBytes("utf-8"));
			fos.write(("content: "+e.child(1).html()+"\n").getBytes("utf-8"));
			fos.write(("post*****over\n").getBytes("utf-8"));
			//System.out.println("标题:"+e.child(0).child(2).child(0).html());
		}

		for(int i=2;i<10;i++){
			Thread.sleep(5000);
			con.url("http://feiyan35488.iteye.com/?page="+i+"&&show_full=true");
			document = con.get();
			Elements es1 = document.getElementsByClass("blog_main");
			System.out.println("共有帖子 :"+es1.size());
			for(Element e : es1){
				fos.write(("title:"+e.child(0).child(2).child(0).html()+"\n").getBytes("utf-8"));
				fos.write(("classify: "+e.child(0).child(3).child(0).html()+"\n").getBytes("utf-8"));
				fos.write(("date: "+e.child(0).child(0).child(0).html()+"-"+e.child(0).child(0).child(2).html()+"-"+e.child(0).child(0).child(4).html()+"\n").getBytes("utf-8"));
				fos.write(("content: "+e.child(1).html()+"\n").getBytes("utf-8"));
				fos.write(("post*****over\n").getBytes("utf-8"));
				//System.out.println("标题:"+e.child(0).child(2).child(0).html());
			}
		}
	}

两个方法,一 抓取帖子存到文件中,二,从文件中读取保存到数据库中