? .svn
Index: htmlign
===================================================================
RCS file: /cvs/ruby/samidare/htmlign,v
retrieving revision 1.2
diff -u -r1.2 htmlign
--- htmlign	3 Aug 2003 17:03:19 -0000	1.2
+++ htmlign	8 Sep 2006 17:17:59 -0000
@@ -33,7 +33,8 @@
 ignore_pattern = path2pattern(*ignore_path)
 p ignore_pattern
 
-pred = lambda {|e, path|
+pred = lambda {|e|
+  path = e.path.sub(%r{^doc\(\)}, '')
   p path if ignore_pattern === path
   not (
     #(HTree::Elem === e && (e.tagname == 'style' ||
@@ -42,7 +43,7 @@
   )
 }
 
-tree2 = tree1.filter_with_path(&pred)
+tree2 = tree1.make_loc.filter(&pred)
 
 tf1 = Tempfile.new('htmldiff1')
 PP.pp(tree1, tf1)
Index: main.rb
===================================================================
RCS file: /cvs/ruby/samidare/main.rb,v
retrieving revision 1.52
diff -u -r1.52 main.rb
--- main.rb	14 Apr 2006 15:33:15 -0000	1.52
+++ main.rb	8 Sep 2006 17:17:59 -0000
@@ -38,6 +38,10 @@
   def initialize(hash, config)
     @status = hash
     @config = config
+    if @config['TimeURI']
+      @config['URI'] = Time.now.strftime(@config['TimeURI'])
+      @config['LinkURI'] = @config['URI']
+    end
     related_uris.each {|uri|
       ENTRIES[uri] ||= []
       ENTRIES[uri] << self
@@ -337,6 +341,10 @@
     author = t.author
     log['extractedAuthor'] = author.to_s if author
 
+    if %r!\Ahttp://d\.hatena\.ne\.jp/([^/]+)/rss\z! =~ @config['URI']
+      log['extractedAuthor'] ||= $1
+    end
+
     t.traverse_element('meta', '{http://www.w3.org/1999/xhtml}meta') {|e|
       begin
         next unless e.fetch_attr("http-equiv").downcase == "last-modified"
@@ -359,6 +367,23 @@
           log['extractedLinkURI'] = link_uri.to_s
         end
       end
+      unless author
+        if author = t.find_element('{http://purl.org/rss/1.0/}author',
+                                   '{http://purl.org/dc/elements/1.1/}creator')
+          log['extractedAuthor'] = author.extract_text.to_s.strip
+        end
+      end
+    end
+
+    rexml = t.to_rexml
+    if image = rexml.elements['/rdf:RDF/image/@rdf:about'] ||
+       rexml.elements["/html/head/link[@rel='shortcut icon']/@href"] ||
+       rexml.elements["/html/head/link[@rel='icon']/@href"]
+      base_uri = URI.parse(log['baseURI'] || @config['URI'])
+      image_uri = base_uri + image.to_s
+      #if image_uri.scheme == 'http' && base_uri.host == image_uri.host # xxx: refine safety test
+        log['extractedImageURI'] = image_uri.to_s
+      #end
     end
 
     if root
@@ -908,6 +933,7 @@
         h['title'] ||= l['extractedTitle'] if l['extractedTitle']
         h['author'] ||= l['extractedAuthor'] if l['extractedAuthor']
         h['linkURI'] ||= l['extractedLinkURI'] if l['extractedLinkURI']
+        h['imageURI'] = l['extractedImageURI'] if l['extractedImageURI']
       end
       if StatusMap[logseq.last['status']] == 'e'
         l = logseq.last
@@ -1212,8 +1238,8 @@
       str << '0,'
       str << h['linkURI'].gsub(/[,\\]/) { "\\#$&" } << ','
       str << h['title'].gsub(/[,\\]/) { "\\#$&" }.strip.gsub(/\s/, ' ') << ','
-      str << (h['author'] || '0').gsub(/[,\\]/) { "\\#$&" }.strip.gsub(/\s/, ' ') << ','
-      str << '0,'
+      str << (h['author'] || '(unknown author)').gsub(/[,\\]/) { "\\#$&" }.strip.gsub(/\s/, ' ') << ','
+      str << 'http://www.rubyist.net/~kazu/samidare/,'
       str << "\n"
     }
     str = str.encode_charset('euc-jp')
