#! /usr/local/bin/ruby ################################################################## #### RSS2NNTP module, to convert RSS articles to NNTP articles #### #### Written by F. Senault (fred@lacave.net), free to use, #### redistribute, modify. #### No warranty at all, implied or whatever. Use at your own #### risk. ################################################################## begin require 'rubygems' rescue Exception end require 'feed_tools' require 'uri' require 'open3' require 'tempfile' require 'iconv' require 'html/htmltokenizer' require 'htmlentities' $PATH = 'innxmit-lacave!rss-nntp-lacave' $ROOTGROUP = 'lacave.rss.' class RSS2NNTPConf def initialize(f = nil) @conf = { :path => 'rss2nntp', :hierarchy => 'nonexistent.test.', :converter => '/usr/local/bin/lynx -dump -force_html ' + '-assume_charset iso-8859-15 -nolist ', :host => Socket.gethostname, :server => 'localhost', :abuse => 'abuse@' + Socket.gethostname.gsub(/^[^.]*\./, ''), :program => 'Lacave.net RSS2NNTP aggregator', :version => '0.5.3', :usesig => true, :debug => false, :verbose => false, :post => true, } @feeds = {} if(f.nil?) then f = $0 f.gsub!(/\/[^\/]*$/,'/') f += "rss2nntp.conf" end File.open(f) do |h| a = '' while true l = h.gets if !l.nil? l.chomp! l.gsub!(/#.*$/, '') if l =~ /^\s/ a += l.gsub(/^\s+/, ' ') next end end if a != '' a.strip! if a =~ /^([A-Z0-9]+)\s*=\s*(.+)$/ key = $1.downcase.to_sym val = $2 @conf[key] = val elsif a =~ /^([^\s]+)$/ key = $1 val = ($2.nil? ? '' : $2) #@feeds[key] = val setgroup(key, val) elsif a =~ /^(.+?)\s+(.+)$/ #elsif a =~ /^([^\s]+?)(?:\s+([^\s]+))?(?:\s+([^\s]+))?$/ key = $1 val = ($2.nil? ? '' : $2) #@feeds[key] = val setgroup(key, val) end end a = l break if l.nil? end [:usesig, :debug, :verbose, :post].each do |k| if @conf[k].is_a? String @conf[k] = (@conf[k].downcase == 'yes' || @conf[k].downcase == 'true') end end end end def []=(key, val) @conf[key] = val end def [](key) @conf[key] end def setgroup(url, group) if(group != '') then if(group[0..0] == '/') then @feeds[url] = group[1..-1] else @feeds[url] = @conf[:hierarchy] + group end else @feeds[url] = '' end end def feed(url) @feeds[url] end def feeds @feeds end end class RSSArticle < Net::NNTP::Article @@conf = nil @@conv = nil def initialize(item, feed, url) super() @@conf = RSS2NNTPConf.new() if(@@conf.nil?) @@conv = Iconv.new('ISO-8859-15//TRANSLIT', 'UTF-8') if(@@conv.nil?) @tags = [] @body = parse_body(item, feed) g = @@conf.feed(url) if(g == '') then g = RSSArticle.make_group(url) @@conf.setgroup(url, g) end self['Path'] = @@conf[:path] self['Subject'] = if(item.title.nil? || item.title == '') then 'Pas de sujet' else @@conv.iconv(HTMLEntities.decode_entities(item.title)) end self['From'] = @@conv.iconv(HTMLEntities.decode_entities(make_from(item, feed))) self['Newsgroups'] = g #@@conf[:hierarchy] + g self['Message-ID'] = RSSArticle.make_mid(item, g) self['Date'] = make_rss_date(item) self['NNTP-Posting-Host'] = @@conf[:host] self['NNTP-Posting-Date'] = RSSArticle.make_date(Time.now) self['Lines'] = @body.scan(/\n/).length self['Mime-Version'] = '1.0' self['Content-Type'] = 'text/plain; charset="iso-8859-15"' self['Content-Transfer-Encoding'] = '8bit' self['X-Trace'] = "#{@@conf[:host]} #{Time.now.to_i} "+ "#{Process.pid} 127.0.0.1 (#{Time.now.to_s})" self['X-Complaints-To'] = @@conf[:abuse] self['User-Agent'] = @@conf[:program] + ' ' + @@conf[:version] self['Keywords'] = make_keywords(item.categories) self end def self.loadconfig(conf) @@conf = conf end def self.make_group(url) u = URI.parse(url) g = u.host.split(/\./).reverse g.shift if(g[0] == 'co' || g[0] == 'uc') then g.shift end if(g[-1] == 'www') then g.pop end g.join('.') end def self.make_mid(item, group) if(item.guid.nil?) then guid = item.link else guid = item.guid end if(guid =~ /^http:\/\//) then u = URI.parse(guid) l = u.path.gsub(/\//, '.') l.gsub!(/^./, '') l.gsub!(/.$/, '') l.gsub!(/index.[a-z]{2,6}\./, '') if(!u.query.nil? && u.query != '') then l += (l == '' ? '' : '.') + "#{u.query}" end if(!u.fragment.nil? && u.fragment != '') then l += (l == '' ? '' : '.') + "#{u.fragment}" end mid = "#{l}" else mid = guid if(mid =~ /^tag:([^,])+,/) mid.gsub!(/^tag:([^,])+,/,'') end end if(mid == '' && !item.published.nil?) then mid = item.published.to_s end if(mid == '' && !item.time.nil?) then mid = item.time.to_s end if(mid == '') then mid = item.title end mid.gsub!(/[:;,=+]/, '.') mid.gsub!(/[^a-zA-Z0-9._-]/, '') mid = "<#{mid}.1@#{group}>" end def parse_body(item, feed) b = '' tf = Tempfile.new('rss2news') tf.puts pre_parse_links(@@conv.iconv(item.content)) tf.close Open3.popen3(@@conf[:converter] + " #{tf.path}") do |i, o, e| b.each do |l| i.puts(b) end i.close_write b = '' while(l = o.gets) l.gsub!(/^ /, '') b += l end end tf.close(true) b = post_parse_links(b) b << "\n" unless(b[-1,1] == "\n") if(@@conf[:usesig]) then b << "-- \n" end b << "Original : #{item.link}\nSite : #{feed.link}\n" b end def pre_parse_links(content) r = '' lok = -1 ll = false tk = HTMLTokenizer.new(content) acc = '' while(t = tk.getNextToken) if(t.is_a?(HTMLTag) && t.tag_name == 'a') then if(t.attr_hash['href'] && \ t.attr_hash['href'] =~ /^(http|mailto|ftp|news|nntp)/) then @tags.push(t.attr_hash['href']) lok = @tags.length else lok = -1 end elsif(t.is_a?(HTMLTag) && (t.tag_name == '/a' || t.tag_name == 'img')) then if(acc != '') then r << acc if(lok == -1 || acc.strip != @tags[lok - 1].strip) acc = '' end if(t.attr_hash['alt'] && t.attr_hash['alt'] != '') then r << ' ' unless(r[-1,1] == ' ') r << '[' + t.attr_hash['alt'] + ']' end if(t.tag_name == 'img') then @tags.push(t.attr_hash['src']) r << '&-' + @tags.length.to_s else if(lok != -1) then r << '&-' + lok.to_s else r << '&-lien' end lok = -1 end r << '-&' else if(lok == -1) then r << t.to_s else acc << t.to_s end end end r end def post_parse_links(content) return content if(@tags.length == 0) r = content.gsub(/( +)?&-((?:lien|img|\d+)(?::.*?)?)-&([ ()'`´"])?/) do |m| e = '' e << ($1.nil? ? ' ' : $1.squeeze) unless($`[-1,1] == "\n" || $`.length == 0) if($2[0..2] == 'img' && $2.length > 3) then e << "[#{$2[3..-1]}]" elsif($2[0..2] == 'img') then e << "[image]" else e << "[#{$2}]" end e << ($3.nil? ? ' ' : $3) end r << "\n" unless(r =~ /\n\n$/) f = '%' + (Math.log10(@tags.length).to_i + 1).to_s + 'd' @tags.each_index do |i| @tags[i].gsub!(/&/, '&') @tags[i].gsub!(/'/, '\'') r << "[#{f % [ i + 1 ] }] : #{@tags[i]}\n" end r end def make_from(item, feed) if(item.author.nil? || item.author.name.nil? || item.author.name == '') then if(feed.title.nil?) then "Unknown" else feed.title end else item.author.name end end def make_keywords(c) if(c.nil?) then '' else c.collect { |c| ( c.label.nil? ? c.term : c.label ) }.join(',') end end def make_rss_date(item) d = nil if(!item.published.nil?) then d = item.published end if(d.nil? && !item.time.nil?) then d = item.time end if(d.nil?) then d = Time.now end Net::NNTP::Article.make_date(d) end end