用Python抓取某些瀑布流网站下的某些画板的所有图片

2 个回答

给你个Ruby版本的吧

#!/usr/bin/env ruby
require 'rubygems'
require 'open-uri'
require 'nokogiri'

max = 0
image_urls = []
fetch_page_max = 5

fetch_page_max.times do
  max_string = max == 0 ? "" : "?max=#{max}&limit=20"
  url ="http://huaban.com/boards/7312#{max_string}"
  doc = Nokogiri::HTML(open(url))
  doc.css(".pin").each do |f|
    max = f["data-id"]
    if f.at_css("a.img")[:href] != "/null/"
      image_urls << f.at_css("a.img img")[:src]
    end
  end
end

image_urls.each do |url|
  puts url
end