diff -u /home/rubikitch/compile/org/plucker/parser/python/PyPlucker/Retriever.py /home/rubikitch/src/plucker/parser/python/PyPlucker/Retriever.py
--- /home/rubikitch/compile/org/plucker/parser/python/PyPlucker/Retriever.py	2002-07-10 00:45:03.000000000 +0900
+++ /home/rubikitch/src/plucker/parser/python/PyPlucker/Retriever.py	2003-12-10 00:38:09.000000000 +0900
@@ -37,6 +37,8 @@
 import urllib
 import types
 import sys
+import tempfile
+
 try:
     import gzip
     import StringIO
@@ -263,6 +265,23 @@
                 # Now get the contents
                 contents = webdoc.read ()
 
+                
+                # jp-encode, 2001/10/01 takayosi_sato<takayosi@m-net.ne.jp>
+                #if(headers_dict['content-type'] == 'text/html'):
+                #    toSJIS = kconv.Kconv(kconv. SJIS,kconv.AUTO, kconv.HANKAKU)
+                #    contents = toSJIS.convert(contents)
+                # ip-encode end
+
+                # NKF, [2002/02/26] rubikitch<rubikitch@ruby-lang.org>
+                if(headers_dict['content-type'] == 'text/html' or
+                   headers_dict['content-type'] == 'text/plain'):
+                    temppath = tempfile.mktemp()
+                    tempf = open( temppath, "w" )
+                    tempf.write( contents )
+                    tempf.close()
+                    pipe = os.popen( "nkf -xs %(temppath)s" % vars(), "r" )
+                    contents =  pipe.read()
+                    os.unlink( temppath )
                 # Check if encoded contents...
                 if headers_dict.has_key ('content-encoding'):
                     encoding = headers_dict['content-encoding']
diff -u /home/rubikitch/compile/org/plucker/parser/python/PyPlucker/TextParser.py /home/rubikitch/src/plucker/parser/python/PyPlucker/TextParser.py
--- /home/rubikitch/compile/org/plucker/parser/python/PyPlucker/TextParser.py	2003-08-03 23:07:02.000000000 +0900
+++ /home/rubikitch/src/plucker/parser/python/PyPlucker/TextParser.py	2003-12-10 00:52:00.000000000 +0900
@@ -60,9 +60,9 @@
 
 # the following constant states how big (approximately) one single
 # paragraphs should maximally be
-Max_Paragraph_Size = 1000
+Max_Paragraph_Size = 100
 # how much more to allow in order not to break an anchor
-Max_Paragraph_Size_Anchor_Stretch = 150
+Max_Paragraph_Size_Anchor_Stretch = 50
 
 
 ## The following are used in the parser to clean up things.
@@ -205,7 +205,7 @@
             "h5": 5,
             "h6": 6,
             "tt" : 8,
-            "pre": 8,
+            "pre": 0,
             "small": 9,
             "sub" : 10,
             "sup" : 11,
@@ -830,7 +830,7 @@
 
         text = _clean_newlines (text)
         # This we use to build the document
-        self._doc = TextDocBuilder (url, config, max_paragraph_size=3000)
+        self._doc = TextDocBuilder (url, config, max_paragraph_size=100)
         self._url = url
         self._base = None        # use this if defined for relative URLs
         self._config = config
