aboutsummaryrefslogtreecommitdiff
blob: be81ef04e1ebe31a283c61777aad4e0b88138e65 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
# R overlay -- util, file read operations
# -*- coding: utf-8 -*-
# Copyright (C) 2012-2014 André Erdmann <dywi@mailerd.de>
# Distributed under the terms of the GNU General Public License;
# either version 2 of the License, or (at your option) any later version.

import mimetypes
import sys
import os.path
import shutil
import errno

import roverlay.util.common
import roverlay.util.objects
import roverlay.util.compression
import roverlay.strutil
from roverlay.strutil import bytes_try_decode
from roverlay.util.compression import \
   COMP_XZ, COMP_BZIP2, COMP_GZIP, LZMAError, \
   get_compress_open, check_compression_supported


_MIME = mimetypes.MimeTypes()
guess_filetype = _MIME.guess_type


def strip_newline ( s ):
   return s.rstrip ( '\n' )
# --- end of strip_newline (...) ---

def read_compressed_file_handle ( CH, preparse=None ):
   if preparse is None:
      for line in CH.readlines():
         yield bytes_try_decode ( line )
   elif preparse is True:
      for line in CH.readlines():
         yield strip_newline ( bytes_try_decode ( line ))
   else:
      for line in CH.readlines():
         yield preparse ( bytes_try_decode ( line ) )
# --- end of read_compressed_file_handle (...) ---

def read_text_file ( filepath, preparse=None, try_harder=True ):
   """Generator that reads a compressed/uncompressed file and yields text
   lines. Optionally preparses the rext lines.

   arguments:
   * filepath   -- file to read
   * preparse   -- function for (pre-)parsing lines
   * try_harder -- try known compression formats if file extension cannot
                   be detected (defaults to True)
   """

   ftype         = guess_filetype ( filepath )
   compress_open = get_compress_open ( ftype[1], None )

   if compress_open is not None:
      with compress_open ( filepath, mode='r' ) as CH:
         for line in read_compressed_file_handle ( CH, preparse ):
            yield line

      return

   elif try_harder:
      # guess_filetype detects file extensions only
      #
      #  try known compression formats
      #
      for comp in ( COMP_BZIP2, COMP_XZ, COMP_GZIP ):
         CH = None
         copen = get_compress_open ( comp, None )
         if copen is not None:
            try:
               CH      = copen ( filepath, mode='r' )
               creader = read_compressed_file_handle ( CH, preparse )
               # safely read first line only
               line    = next ( creader )

            except ( StopIteration, EOFError ):
               # empty file (?)
               CH.close()
               CH = None
               # *** FIXME: workaround ***
               # retry as normal file,
               #  EOFError may be caused by small plain text files, too
               # COULDFIX: ?empty compressed files?
               #return

            except IOError as ioerr:
               # failed to open (gzip, bzip2)
               if CH: CH.close()
               CH = None
               if ioerr.errno is not None:
                  raise

            except LZMAError as err:
               # failed to open (xz)
               if CH: CH.close()
               CH = None

            except:
               if CH: CH.close()
               raise

            else:
               # read remaining lines
               for line in creader:
                  yield line
               CH.close()
               return
            # -- end try
         # -- end if
      # -- end for <comp>

   # -- end if <try to read filepath as compressed file>

   # file doesn't seem to be compressed (or not supported)
   with open ( filepath, 'rt' ) as FH:
      if preparse is None:
         for line in FH.readlines():
            yield line
      elif preparse is True:
         for line in FH.readlines():
            yield strip_newline ( line )
      else:
         for line in FH.readlines():
            yield preparse ( line )
# --- end of read_text_file (...) ---

def write_text_file (
   filepath, lines, compression=None, mode='wt',
   append_newlines=True, append_newline_eof=False, create_dir=True,
   newline='\n'
):
   compress_open = get_compress_open ( compression ) if compression else None
   lines_iter    = ( lines, ) if isinstance ( lines, str ) else lines

   if create_dir:
      roverlay.util.common.dodir_for_file ( filepath )

   if compress_open:
      NL = newline.encode()
      with compress_open ( filepath, mode.rstrip ( 'tu' ) ) as CH:
         for line in lines_iter:
            CH.write ( str ( line ).encode() )
            if append_newlines:
               CH.write ( NL )

         if append_newline_eof:
            CH.write ( NL )
   else:
      with open ( filepath, mode ) as FH:
         for line in lines_iter:
            FH.write ( str ( line ) )
            if append_newlines:
               FH.write ( newline )
         if append_newline_eof:
            FH.write ( newline )

   return True
# --- end of write_text_file (...) ---


class TextFile ( roverlay.util.objects.PersistentContent ):

   READ_PREPARSE   = True
   READ_TRY_HARDER = True

   @classmethod
   def get_default_compression ( cls ):
      return None
   # --- end of get_default_compression (...) ---

   @classmethod
   def check_compression_supported ( cls, compression ):
      return check_compression_supported ( compression )
   # --- end of check_compression_supported (...) ---

   def __init__ ( self, filepath, compression=None ):
      super ( TextFile, self ).__init__()

      self._filepath    = None
      self._compression = None

      self.first_line   = None
      self.lino         = None

      self.set_filepath ( filepath )
      self.set_compression ( compression )
   # --- end of __init__ (...) ---

   @roverlay.util.objects.abstractmethod
   def parse_line ( self, line ):
      return True
   # --- end of parse_line (...) ---

   def parse_header_line ( self, line ):
      return self.parse_line ( line )
   # --- end of parse_header_line (...) ---

   @roverlay.util.objects.abstractmethod
   def gen_lines ( self ):
      #yield ...
      return
   # --- end of gen_lines (...) ---

   def start_reading ( self ):
      pass
   # --- end of start_reading (...) ---

   def done_reading ( self ):
      pass
   # --- end of done_reading (...) ---

   def set_filepath ( self, filepath ):
      self._filepath = filepath
   # --- end of set_filepath (...) ---

   def set_compression ( self, compression ):
      if not compression or compression == 'none':
         self._compression = None
      elif compression == 'default':
         if __debug__:
            comp = self.get_default_compression()
            assert self.check_compression_supported ( comp )
            self._compression = comp
         else:
            self._compression = self.get_default_compression()
      elif self.check_compression_supported ( compression ):
         self._compression = compression
      else:
         raise ValueError (
            "unknown file compression {!r}".format ( compression )
         )
   # --- end of set_compression (...) ---

   def backup_file ( self, destfile=None, move=False, ignore_missing=False ):
      """Creates a backup copy of the file.

      arguments:
      * destfile       -- backup file path
                          Defaults to <dfile> + '.bak'.
      * move           -- move dfile (instead of copying)
      * ignore_missing -- return False if file does not exist instead of
                          raising an exception. Defaults to False.
      """
      dest = destfile or ( self._filepath + '.bak' )
      try:
         roverlay.util.dodir ( os.path.dirname ( dest ), mkdir_p=True )
         if move:
            shutil.move ( self._filepath, dest )
            return True
         else:
            shutil.copyfile ( self._filepath, dest )
            return True
      except IOError as ioerr:
         if ignore_missing and ioerr.errno == errno.ENOENT:
            return False
         else:
            raise
   # --- end of backup_file (...) ---

   def backup_and_write ( self,
      destfile=None, backup_file=None,
      force=False, move=False, ignore_missing=True
   ):
      """Creates a backup copy of the distmap file and writes the modified
      distmap afterwards.

      arguments:
      * destfile       -- file path to be written (defaults to self._filepath)
      * backup_file    -- backup file path (see backup_file())
      * force          -- enforce writing even if file content not modified
      * move           -- move distmap (see backup_file())
      * ignore_missing -- do not fail if the file does not exist when
                          creating a backup copy.
                          Defaults to True.
      """
      if force or self.dirty:
         self.backup_file (
            destfile=backup_file, move=move, ignore_missing=ignore_missing
         )
         return self.write ( filepath=destfile, force=True )
      else:
         return True
   # --- end of backup_and_write (...) ---

   def file_exists ( self ):
      """Returns True if the file exists, else False."""
      return os.path.isfile ( self._filepath )
   # --- end of file_exists (...) ---

   def try_read ( self, *args, **kwargs ):
      """Tries to read the file."""
      ret = None
      try:
         self.read ( *args, **kwargs )
      except IOError as ioerr:
         if ioerr.errno == errno.ENOENT:
            ret = False
         else:
            raise
      else:
         ret = True

      return ret
   # --- end of try_read (...) ---

   def read ( self, filepath=None ):
      """Reads the file.

      arguments:
      * filepath -- path to the distmap file (defaults to self.dbfile)
      """
      fpath = self._filepath if filepath is None else filepath

      self.start_reading()

      self.first_line = True
      self.lino       = 0
      for lino, line in enumerate (
         read_text_file ( fpath,
            preparse=self.READ_PREPARSE, try_harder=self.READ_TRY_HARDER
         )
      ):
         self.lino = lino
         if self.first_line:
            self.first_line = False
            # parse_header_line() can reset first_line to True
            self.parse_header_line ( line )
         else:
            self.parse_line ( line )

      if filepath is not None:
         self.set_dirty()

      self.done_reading()
   # --- end of read (...) ---

   def write ( self, filepath=None, force=False ):
      """Writes the file.

      arguments:
      * filepath -- path to the file to be written (defaults to self._filepath)
      * force    -- enforce writing even if file content not modified
      """
      if force or self.dirty or filepath is not None:
         fpath = self._filepath if filepath is None else filepath
         write_text_file (
            fpath, self.gen_lines(),
            compression=self._compression, create_dir=True
         )

         if filepath is None:
            self.reset_dirty()
         # else keep

         return True
      else:
         return False
   # --- end of write (...) ---

# --- end of TextFile ---