130 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Ruby
		
	
	
	
			
		
		
	
	
			130 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Ruby
		
	
	
	
# encoding: utf-8
 | 
						|
 | 
						|
require 'rss'
 | 
						|
require 'mongo'
 | 
						|
 | 
						|
SITES = { "總務處-各單位公告" => "0",
 | 
						|
          "總務處-文書組" => "1",
 | 
						|
          "總務處-出納組" => "2",
 | 
						|
          "總務處-事務組" => "3",
 | 
						|
          "總務處-保管組" => "5",
 | 
						|
          "總務處-採購組" => "6",
 | 
						|
          "總務處-經營管理組" => "7",
 | 
						|
          "總務處-駐衛警察隊" => "9",
 | 
						|
          "總務處-營繕組" => "10",
 | 
						|
          "總務處-總務處" => "11",
 | 
						|
          "社會科學院-社會科學院總務分處" => "4",
 | 
						|
          "醫學院-醫學院總務分處" => "8" }
 | 
						|
 | 
						|
SITE_KEYS = SITES.keys
 | 
						|
 | 
						|
DB_BASE_NAME = "production"
 | 
						|
 | 
						|
all = {}
 | 
						|
continue = true
 | 
						|
i = 1
 | 
						|
yesterday = Time.now - 86400
 | 
						|
 | 
						|
while continue do
 | 
						|
  open("http://ann.cc.ntu.edu.tw/asp/rss.asp?page=#{i}") do |rss|
 | 
						|
    feed = RSS::Parser.parse(rss.read.encode('utf-8', 'big5', invalid: :replace, undef: :replace, replace: '').gsub('<pubDate>Wes,', '<pubDate>Wed,').gsub(/(encoding=\"big5\")/, 'encoding="utf-8"'))
 | 
						|
    feed.items.each do |item|
 | 
						|
      if item.pubDate > yesterday
 | 
						|
        if SITE_KEYS.include?(item.author)
 | 
						|
          author = item.author.strip
 | 
						|
          category = item.category.to_s.gsub(/\<(\/)*category\>/, '')
 | 
						|
          if all[author]
 | 
						|
            all[author][item.link.strip] = {title: item.title.strip, author: author, link: item.link.strip, date: item.pubDate, category: category, description: item.description.gsub("\r\n", '<br/>').strip}
 | 
						|
          else
 | 
						|
            all[author] = {item.link.strip => {title: item.title.strip, author: author, link: item.link.strip, date: item.pubDate, category: category, description: item.description.gsub("\r\n", '<br/>').strip}}
 | 
						|
          end
 | 
						|
        end
 | 
						|
      else
 | 
						|
        continue = false
 | 
						|
        break
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
  i += 1
 | 
						|
end
 | 
						|
 | 
						|
# Get corresponding category_id or create a new one
 | 
						|
def get_category_id(category, categories, coll_cat)
 | 
						|
  if categories.keys.include? "rss_#{category}"
 | 
						|
    [categories["rss_#{category}"], categories]
 | 
						|
  else
 | 
						|
    cat = {
 | 
						|
      _type: "BulletinCategory",
 | 
						|
      key: "rss_#{category}",
 | 
						|
      disable: false,
 | 
						|
      title: {:zh_tw => category},
 | 
						|
      created_at: Time.now,
 | 
						|
      updated_at: Time.now
 | 
						|
    }
 | 
						|
    categories["rss_#{category}"] = result = coll_cat.save(cat)
 | 
						|
    [result, categories]
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
# Get categories and id based on a given site number
 | 
						|
def get_mongo_and_categories(site_number="0")
 | 
						|
  db = Mongo::Connection.new("localhost", 27017).db("#{DB_BASE_NAME}_#{site_number}")
 | 
						|
  coll_bulletin = db["bulletins"]
 | 
						|
  coll_cat =  db["bulletin_categories"]
 | 
						|
 | 
						|
  categories = coll_cat.find().to_a.inject({}) do |categories, category|
 | 
						|
    categories[category['key']] = category['_id']
 | 
						|
    categories
 | 
						|
  end
 | 
						|
  [categories, coll_bulletin, coll_cat]
 | 
						|
end
 | 
						|
 | 
						|
# Get main site (總務處) categories
 | 
						|
@main_categories, @main_coll_bulletin, @main_coll_cat = get_mongo_and_categories
 | 
						|
@copy_categories, @copy_coll_bulletin, @copy_coll_cat = get_mongo_and_categories('11')
 | 
						|
 | 
						|
all.each do |key, value| # Loop through all the authors
 | 
						|
  site_number = SITES[key]
 | 
						|
  categories, coll_bulletin, coll_cat = get_mongo_and_categories(site_number) # Get current's site categories
 | 
						|
  value.each_value do |bul| # Loop through all the items
 | 
						|
    category_id, categories = get_category_id(bul[:category], categories, coll_cat)
 | 
						|
    unless coll_bulletin.find_one(rss_link: bul[:link]) 
 | 
						|
      bulletin = {  _type: "Bulletin",
 | 
						|
                    postdate: bul[:date],
 | 
						|
                    created_at: bul[:date],
 | 
						|
                    updated_at: bul[:date],
 | 
						|
                    is_checked: true,
 | 
						|
                    is_pending: false,
 | 
						|
                    is_rejected: false,
 | 
						|
                    bulletin_category_id: category_id,
 | 
						|
                    title: {:zh_tw => bul[:title]},
 | 
						|
                    text: {:zh_tw => bul[:description]},
 | 
						|
                    available_for_zh_tw: true,
 | 
						|
                    rss_link: bul[:link],
 | 
						|
                    is_top: false,
 | 
						|
                    is_hot: false,
 | 
						|
                    is_hidden: false }
 | 
						|
      coll_bulletin.save(bulletin) 
 | 
						|
 | 
						|
      unless site_number.eql?("0") || @main_coll_bulletin.find_one(rss_link: bul[:link]) # Copy the item to the main site
 | 
						|
 | 
						|
        category_id, @main_categories = get_category_id(bul[:category], @main_categories, @main_coll_cat)
 | 
						|
        main_bulletin = bulletin.clone
 | 
						|
        main_bulletin['_id'] = BSON::ObjectId.new
 | 
						|
        main_bulletin[:bulletin_category_id] = category_id
 | 
						|
        @main_coll_bulletin.save(main_bulletin)
 | 
						|
 | 
						|
        category_id, @copy_categories = get_category_id(bul[:category], @copy_categories, @copy_coll_cat)
 | 
						|
        copy_bulletin = bulletin.clone
 | 
						|
        copy_bulletin['_id'] = BSON::ObjectId.new
 | 
						|
        copy_bulletin[:bulletin_category_id] = category_id
 | 
						|
        @copy_coll_bulletin.save(copy_bulletin)
 | 
						|
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
 | 
						|
 |