#!/usr/bin/env ruby # # $Id: grep.rb,v 1.33.1 2003/12/06 23:58:23 matz Exp $ # # Copyright (C) 2003 Minero Aoki # # This program is free software. # You can distribute/modify this program under the terms of # the GNU LGPL, Lesser General Public License version 2.1. # # Usage: # # 1. Put this file in to tDiary's CGI directory and make it executable. # e.g. # # $ ls -F # grep.rb* # index.rb* # tdiary.conf # update.rb* # # 2. Add following HTML form in your tDiary header: # #
#

% grep '' *.td2

#
# # If you want to use tDiary's theme for search result page, # use following form instead: # #
#

% grep '' *.td2

# #
# # 3. Done. If something goes wrong, refer the error log of your httpd, # or, consult the source code (this file). # BEGIN { $defout.binmode } require 'tdiary' # # Parameters # # TDIARY_DATA_PATH: tDiary's @data_path. # nil: Gets data directory from tdiary.conf automatically. # String: Use it as the path of data directory. TDIARY_DATA_PATH = nil # USE_THEME: Weather we use theme or not. # nil: honour "theme" CGI parameter. (value = on|off) # true: always use theme. # false: never use theme. USE_THEME = true # THEME: tDiary's @theme. Don't forget passing "theme=on" CGI parameter. # nil: honour @theme setting in tdiary.conf. # String: use it. THEME = nil # ALT_THEMES: tDiary's @alt_themes. # [String]: use it. e.g. ALT_THEMES = %w( clover arrow ) ALT_THEMES = [] # CGI_URL: tDiary's index.rb URL # String: URL CGI_URL = './' # LOGGING: wheather we should log query. # true: enable logging. # false: disable logging. LOGGING = true # DEBUG: Debug level. # true: Prints precise message on error. # false: Prints simplified message on error. DEBUG = $DEBUG # # HTML Templates # def unindent(str, n) str.gsub(/^ {0,#{n}}/, '') end def navi @plugin.instance_eval{apply_plugin("<%= navi %>")} end HEADER_NOCSS = unindent(<<-'EOS', 2) tDiary grep <%= navi %> EOS HEADER_CSS = unindent(<<-'EOS', 2) <% ALT_THEMES.each do |alt| %> <% end %> tDiary grep <%= navi %> EOS FOOTER = unindent(<<'EOS', 2) EOS SEARCH_FORM = unindent(<<"EOS", 2)
<% if theme %><% end %>
EOS SEARCH_RESULT_CSS = unindent(<<-'EOS', 2) + SEARCH_FORM

<%= @conf.html_title %> - tDiary grep: Result

  % fgrep -i '<%= patterns2html(patterns) %>' *.td2
  
<% toomanyhits = match_topics(patterns) {|date, topic| %>

<%= format_date(date) %>

<%= shorten(date, topic) %>
<% } %> <%= toomanyhits ? '

too many hits, omitted.

' : '' %> EOS SEARCH_RESULT_NOCSS = unindent(<<-'EOS', 2) + SEARCH_FORM
  % fgrep -i '<%= patterns2html(patterns) %>' *.td2
  <%
      toomanyhits = match_topics(patterns) {|date, topic|
  %><%= format_date(date) %>: <%= shorten(date, topic) %>
  <%
      }
  %><%= toomanyhits ? 'too many hits, omitted.' : ''
  %>
EOS SEARCH_ERROR = unindent(<<'EOS', 2) + SEARCH_FORM
  % fgrep -i '<%= escape(query) %>' *.td2
  <%= escape(reason) %>.
  
EOS HISTORY = unindent(<<"EOS", 2)

<%= @conf.html_title %> - tDiary Grep: Search History

#{SEARCH_FORM} EOS # # Load Libraries # require 'cgi' class CGI def valid?(name) self.params[name] and self.params[name][0] end end begin require 'erb' ErbEvaluator = ERB rescue LoadError require 'erb/erbl' ErbEvaluator = ERbLight end # borrowed from tdiary require 'nkf' begin require 'uconv' def Uconv.unknown_unicode_handler(unicode) if unicode == 0xff5e "¡Á" else raise Uconv::Error end end def to_euc(str) return str.gsub(/&#(\d+);/) { Uconv.u8toeuc([$1.to_i].pack('U')) } if /&#\d+;/ === str begin return Uconv.u8toeuc(str) rescue Uconv::Error return NKF::nkf('-e -m0', str) end end rescue LoadError def to_euc(str) NKF::nkf('-e -m0', str) end end # # Main # class TDiaryGrepError < StandardError; end class WrongQuery < TDiaryGrepError; end class InvalidTDiaryFormat < TDiaryGrepError; end class ConfigError < TDiaryGrepError; end Z_SPACE = "\241\241" # zen-kaku space def main $KCODE = 'EUC' @cgi = CGI.new @conf = TDiary::Config::new @conf.options['apply_plugin'] = true @years = {} @plugin = TDiary::Plugin::new( 'conf' => @conf, 'diaries' => {}, 'cgi' => @cgi, 'years' => @years, 'cache_path' => @conf.cache_path || tdiary_data_path() ) @plugin_binding = instance_eval{binding()} htmlout = '

error

' begin query = to_euc([@cgi.params['q']].compact.flatten.join(' ')) theme = decide_theme(@cgi.params['theme']) begin if LOGGING and File.file?(query_log()) and @cgi.valid?('history') html = history_page(theme) elsif @cgi.valid?('q') html = search_result_page(theme, setup_patterns(query)) save_query(query, query_log()) if LOGGING else html = search_form_page(theme) end rescue WrongQuery => err html = search_error_page(theme, query, err.message) end htmlout = html rescue Exception => err html = '' html << HEADER_NOCSS html << "
\n"
    html << 'q=' << escape(query) << "\n"
    html << escape(err.class.name) << "\n" if DEBUG
    html << escape(err.message) << "\n"
    html << err.backtrace.map {|i| escape(i) }.join("\n") if DEBUG
    html << "
\n" html << FOOTER htmlout = html ensure send_html htmlout end exit 0 end def send_html(html) print @cgi.header('status' => '200 OK', 'type' => 'text/html', 'charset' => 'euc-jp', 'Content-Length' => html.length.to_s, 'Cache-Control' => 'no-cache', 'Pragma' => 'no-cache') print html unless @cgi.request_method == 'HEAD' end def decide_theme(theme_param) if USE_THEME == nil begin return nil unless [theme_param].compact.flatten[0].to_s.downcase == 'on' THEME || theme_config() rescue ConfigError # Arbitary users can set `theme' parameter, # we should not raise error here. return nil end else return nil unless USE_THEME THEME || theme_config() end end def setup_patterns(query) patterns = query.jsplit.map {|pat| check_pattern pat /#{Regexp.quote(pat)}/ie } raise WrongQuery, 'no pattern' if patterns.empty? raise WrongQuery, 'too many sub patterns' if patterns.length > 8 patterns end def check_pattern(pat) raise WrongQuery, 'no pattern' unless pat raise WrongQuery, 'empty pattern' if pat.empty? raise WrongQuery, "pattern too short: #{pat}" if pat.length < 2 raise WrongQuery, 'pattern too long' if pat.length > 128 end class String def jsplit sub(/\A[\s#{Z_SPACE}]+/oe, '').sub(/[\s#{Z_SPACE}]+\z/oe, '')\ .split(/[\s#{Z_SPACE}]+/oe) end end def save_query(query, file) File.open(file, 'a') {|f| begin f.flock(File::LOCK_EX) f.puts "#{Time.now.to_i}: #{query.dump}" ensure f.flock(File::LOCK_UN) end } end # # eRuby Dispatchers and Helper Routines # def search_form_page(theme) if theme then ErbEvaluator.new(HEADER_CSS + SEARCH_FORM + FOOTER).result(binding()) else ErbEvaluator.new(HEADER_NOCSS + SEARCH_FORM + FOOTER).result(binding()) end end def search_result_page(theme, patterns) if theme then ErbEvaluator.new(HEADER_CSS + SEARCH_RESULT_CSS + FOOTER).result(binding()) else ErbEvaluator.new(HEADER_NOCSS + SEARCH_RESULT_NOCSS + FOOTER).result(binding()) end end def search_error_page(theme, query, reason) if theme then ErbEvaluator.new(HEADER_CSS + SEARCH_ERROR + FOOTER).result(binding()) else ErbEvaluator.new(HEADER_NOCSS + SEARCH_ERROR + FOOTER).result(binding()) end end def history_page(theme) if theme then ErbEvaluator.new(HEADER_CSS + HISTORY + FOOTER).result(binding()) else ErbEvaluator.new(HEADER_NOCSS + HISTORY + FOOTER).result(binding()) end end def query_log "#{tdiary_data_path()}/grep.log" end N_SHOW_QUERY_MAX = 20 def recent_queries return unless File.file?(query_log()) File.readlines(query_log()).reverse[0, N_SHOW_QUERY_MAX].map {|line| time, q = *line.split(/:/, 2) [Time.at(time.to_i), eval(q)] } end def escape_url(u) escape(urlencode(u)) end def tdiary_data_path @tdiary_data_path ||= (TDIARY_DATA_PATH || data_path_config()) end def patterns2html(regs) regs.map {|re| escape(re.source) }.join(' ') end def apply_plugin(date, str) unless defined?(@style_class) style = @conf.style || 'tdiary' require "tdiary/#{style.downcase}_style.rb".untaint @style_class = eval("TDiary::#{style.capitalize}Diary").untaint end str = @style_class::new(date, date, str).to_html({}) @plugin.instance_eval{apply_plugin(str, true)} end def shorten(date, topic) title, body = topic.split(/\n/, 2) title = apply_plugin(date, title.gsub(/\A\s+/, '')) body = apply_plugin(date, body) sprintf('%-20s
%s', title.to_s.strip, body.slice(/\A.{0,120}/em) #.gsub(/[\s#{Z_SPACE}]+/oe, ' ') ) end def format_time(time) sprintf('%04d-%02d-%02d %02d:%02d:%02d', time.year, time.month, time.day, time.hour, time.min, time.sec) end def format_date(date) y, m, d = /\A(\d{4})(\d{2})(\d{2})/.match(date).captures "#{y}-#{m}-#{d}" end TOO_MANY_HITS = 50 def match_topics(patterns) hit = 0 match_topics0(patterns) do |date, topics| yield date, topics hit += 1 return true if hit > TOO_MANY_HITS end false end def match_topics0(patterns) foreach_data_file(tdiary_data_path()) do |path| foreach_diary(path.untaint) do |date, topics| topics.each do |topic| yield date, topic if patterns.all? {|re| re =~ topic } end end end end # # tDiary Implementation Dependent # def foreach_data_file(data_path, &block) Dir.glob("#{data_path}/[0-9]*/*.td2").sort.reverse_each(&block) end def foreach_diary(path) table = {} File.open(path) {|f| f.each('') do |headers| date = headers.slice(/^Date:\s*(\d{4}\d{2}\d{2})/, 1) or raise "unexpected tdiary format: Date=nil:\n#{headers.strip}" format = headers.slice(/^Format:\s*(\S+)/, 1) or raise "unexpected tdiary format: Format=nil:\n#{headers.strip}" diary = f.gets("\n.\n").chomp(".\n") table[date] = split_topics(diary, format) year = date[0,4] @years[year] ||= [] @years[year] ||= [date[4,2]] end } table.keys.sort.reverse_each do |date| yield date, table[date] end end SPLITTER = { 'tdiary' => /\n\n/, 'rd' => /^=(?!=)/, 'blogrd' => /^=(?!=)/, 'wiki' => /^!/ } def split_topics(diary, format) re = SPLITTER[format.downcase] or raise ArgumentError, "unknown diary format: #{format}" diary.strip.split(re) end def data_path_config tdiary_conf().slice(/^\s*@data_path\s*=\s*(['"])(.*?)\1/, 2) or raise ConfigError, 'cannot get tDiary @data_path' end def theme_config conf = data_path_config()+"/tdiary.conf" if File.exist?(conf) theme = File.read(conf).slice(/^\s*theme\s*=\s*(['"])(.*?)\1/, 2) end unless theme theme = tdiary_conf().slice(/^\s*@theme\s*=\s*(['"])(.*?)\1/, 2) end raise ConfigError, 'cannot get tDiary @theme' unless theme raise ConfigError, '@theme set but is empty' if theme.empty? raise ConfigError, "@theme set but not exist: #{theme}" \ unless File.file?("theme/#{theme}/#{theme}.css") theme end def tdiary_conf @tdiary_conf ||= File.read("#{File.dirname(__FILE__)}/tdiary.conf") end # # Utils # ESC = { '&' => '&', '<' => '<', '>' => '>', '"' => '"' } def escape(str) str.gsub(/[&"<>]/) {|s| ESC[s] } end def urlencode(str) str.gsub(/[^\w\-]/n) {|s| sprintf('%%%02x', s[0]) } end def bool(obj) obj ? true : false end # # Old Ruby Compatibility # if RUBY_VERSION < '1.8.0' class String remove_method :slice def slice(re, n = 0) m = re.match(self) or return nil m[n] end end end unless Array.method_defined?(:all?) module Enumerable def all? each do |i| return false unless yield(i) end true end end end unless MatchData.method_defined?(:captures) class MatchData def captures to_a()[1..-1] end end end unless File.respond_to?(:read) def File.read(fname) open(fname) {|f| return f.read } end end main