adacl-eastrings-io.ads

1-----------------------------------------------------------------------------
2--
3-- Copyright 2004 Björn Persson.
4--
5-- This library is free software; you can redistribute it and/or modify it
6-- under the terms of the GNU General Public License, version 2, as published
7-- by the Free Software Foundation.
8--
9-- As a special exception, if other files instantiate generics from this
10-- unit, or you link this unit with other files to produce an executable,
11-- this unit does not by itself cause the resulting executable to be covered
12-- by the General Public License. This exception does not however invalidate
13-- any other reasons why the executable file might be covered by the General
14-- Public License.
15--
16----------------------------------------------------------------------------
17
18pragma License (Modified_Gpl);
19pragma Ada_2022;
20
21with Ada.Streams.Stream_IO; use Ada.Streams;
22with Ada.IO_Exceptions;
23with AdaCL.EAstrings.OS;
24with Ada.Characters.Latin_1;
25with AdaCL.EAstrings.Latin_1;
26with AdaCL.OS.Low_Level;
27
28package AdaCL.EAstrings.IO is
29 --
30 -- This package provides encoding-aware output to and input from text
31 -- files and text streams. Text streams are byte streams that can be read
32 -- or written sequentially, and where the bytes constitute text encoded in
33 -- some character encoding. They have no defined line length; line breaks
34 -- are represented as characters in the stream. There is no concept of
35 -- pages in this package.
36 --
37 -- Each open stream has an associated character encoding. By default,
38 -- streams are assumed to be encoded in the encoding that OS.OS_Encoding
39 -- reports. Another encoding can be specified when the stream is opened or
40 -- later. How line breaks are represented when writing and the set of line
41 -- breaks that shall be recognized when reading can also be configured.
42 --
43 -- In this package it is assumed that Ada.Streams.Stream_Element equals
44 -- one byte, that is, eight bits.
45 --
46
47 pragma Elaborate_Body;
48
49 ---------------------------------------------------------------------------
50 -- EAstream
51 ---------------------------------------------------------------------------
52
53 type EAstream is abstract tagged limited private;
54 type EAstream_Pointer is access all EAstream'Class;
55
56 type Read_Stop_Cause is (All_Done, End_Of_File, Incomplete, Invalid);
57 -- Reason why Get or Get_Line returns.
58 -- All_Done : The requested amount of data was read.
59 -- End_Of_File : The end of the file was reached before the requested
60 -- amount of data was read.
61 -- Incomplete : The file ended in an incomplete code sequence.
62 -- Invalid : The file's content wasn't a valid encoded text for the
63 -- file's encoding.
64
65 ---------------------
66 -- File management --
67 ---------------------
68
69 function Is_Open (File : in EAstream) return Boolean is abstract;
70 -- Returns True if the EAstream is open (that is, if it is associated with
71 -- a byte stream), otherwise returns False.
72
73 function End_Of_File (File : in EAstream) return Boolean;
74 -- Returns True if the end of the file has been found, otherwise returns
75 -- False.
76 --
77 -- End_Of_File can't look ahead. It only returns True if Get or Get_Line
78 -- has already encountered the end of the file. It is quite possible that
79 -- the last character has already been read but End_Of_File returns False,
80 -- because only the next read attempt will fail and report the end of the
81 -- file.
82
83 function Stream
84 (File : in EAstream)
85 return Stream_IO.Stream_Access
86 is abstract;
87 -- Returns a pointer to the underlying raw byte stream that File is
88 -- associated with. Status_Error is propagated if File is not open.
89
90 ----------------------------
91 -- File format management --
92 ----------------------------
93
94 procedure Set_Encoding
95 (File : in out EAstream;
96 Encoding : in Character_Encoding);
97 -- Sets the character encoding that the byte stream should be interpreted
98 -- as when reading from File, or that the text should be encoded in when
99 -- writing to File. The character encoding can be changed in the middle of
100 -- the file if so desired.
101
102 function Encoding (File : in EAstream) return Character_Encoding;
103 -- Returns the character encoding that the byte stream is currently
104 -- interpreted as when reading from File, or that the text is currently
105 -- encoded in when writing to File.
106
107 procedure Set_Line_Break
108 (File : in out EAstream;
109 Line_Break : in EAstring);
110 -- Sets the string to be written for line breaks when Put_Line or New_Line
111 -- is called on File.
112
113 function Line_Break (File : in EAstream) return EAstring;
114 -- Returns the string that is currently used for line breaks when Put_Line
115 -- or New_Line is called on File.
116
117 procedure Set_Recognized_Line_Breaks
118 (File : in out EAstream;
119 Line_Breaks : in EAstring_Array);
120 -- Sets the set of strings to be recognized as line breaks by Get_Line and
121 -- Skip_Line when reading from File.
122
123 function Recognized_Line_Breaks
124 (File : in EAstream)
125 return EAstring_Array;
126 -- Returns the set of strings that are currently recognized as line breaks
127 -- by Get_Line and Skip_Line when reading from File.
128
129 ------------------
130 -- Line control --
131 ------------------
132
133 procedure New_Line (File : in EAstream; Spacing : in Positive := 1);
134 -- Writes the number of line breaks specified by Spacing to File.
135 -- Mode_Error is propagated if File is an EAfile whose mode is not
136 -- Out_File or Append_File.
137
138 procedure New_Line (Spacing : in Positive := 1);
139 -- Like above, but writes to Standard_Output.
140
141 procedure Skip_Line
142 (File : in out EAstream;
143 Spacing : in Positive := 1);
144 -- Reads from File and discards all characters until the number of line
145 -- breaks specified by Spacing has been read.
146 --
147 -- Mode_Error is propagated if File is an EAfile whose mode is not
148 -- In_File. End_Error is propagated if an attempt is made to read a file
149 -- terminator. Incomplete_Byte_Sequence is propagated if File ends in an
150 -- incomplete code sequence.
151 --
152 -- Invalid_Byte_Sequence is propagated if File's content isn't a valid
153 -- encoded text for File's encoding.
154
155 procedure Skip_Line (Spacing : in Positive := 1);
156 -- Like above, but reads from Standard_Input.
157
158 -----------------------------
159 -- String input and output --
160 -----------------------------
161
162 procedure Get
163 (File : in out EAstream;
164 Item : out EAstring;
165 Length : in Natural;
166 Exit_Cause : out Read_Stop_Cause);
167 -- Reads the number of characters specified by Length from File and stores
168 -- them in Item. If line breaks are encountered they will be included in
169 -- Item just like other characters.
170 --
171 -- Mode_Error is propagated if File is an EAfile whose mode is not
172 -- In_File. Otherwise, success or failure is reported in Exit_Cause. In
173 -- case of failure, as much as could be read is returned in Item.
174
175 procedure Get
176 (File : in out EAstream;
177 Item : out EAstring;
178 Length : in Natural);
179 -- Like above, but raises exceptions when reading fails: End_Error is
180 -- raised if an attempt is made to read a file terminator.
181 -- Incomplete_Byte_Sequence is raised if File ends in an incomplete code
182 -- sequence.
183 --
184 -- Invalid_Byte_Sequence is raised if File's content isn't a valid encoded
185 -- text for File's encoding.
186
187 procedure Get (Item : out EAstring; Length : in Natural);
188 -- Like above, but reads from Standard_Input.
189
190 procedure Get_Line
191 (File : in out EAstream;
192 Item : out EAstring;
193 Exit_Cause : out Read_Stop_Cause);
194 -- Reads characters from File and stores them in Item until one of the
195 -- recognized line breaks is encountered. In the next call to Get or
196 -- Get_Line, the first character read will be the first one after the line
197 -- break.
198 --
199 -- Mode_Error is propagated if File is an EAfile whose mode is not
200 -- In_File. Otherwise, success or failure is reported in Exit_Cause. In
201 -- case of failure, as much as could be read is returned in Item.
202 --
203 -- If the file ends before anything has been read, Exit_Cause is set to
204 -- End_Of_File. If characters have been read, Exit_Cause is set to
205 -- All_Done and End_Of_File is not reported until the next read attempt.
206 -- That is, it is not required that a file ends with a line break.
207
208 procedure Get_Line (File : in out EAstream; Item : out EAstring);
209 -- Like above, but raises exceptions when reading fails: End_Error is
210 -- raised if the file ends before anything has been read.
211 -- Incomplete_Byte_Sequence is raised if File ends in an incomplete code
212 -- sequence.
213 --
214 -- Invalid_Byte_Sequence is raised if File's content isn't a valid encoded
215 -- text for File's encoding.
216
217 procedure Get_Line (Item : out EAstring);
218 -- Like above, but reads from Standard_Input.
219
220 procedure Put (File : in EAstream; Item : in EAstring);
221 -- Writes the string in Item to File. Mode_Error is propagated if File is
222 -- an EAfile whose mode is not Out_File or Append_File.
223
224 procedure Put (Item : in EAstring);
225 -- Like above, but writes to Standard_Output.
226
227 procedure Put_Line (File : in EAstream; Item : in EAstring);
228 -- Writes the string in Item to File, followed by a line break. Mode_Error
229 -- is propagated if File is an EAfile whose mode is not Out_File or
230 -- Append_File.
231
232 procedure Put_Line (Item : in EAstring);
233 -- Like above, but writes to Standard_Output.
234
235 -----------------------------------------------
236 -- The default input, output and error files --
237 -----------------------------------------------
238
239 function Standard_Input return EAstream_Pointer;
240 function Standard_Output return EAstream_Pointer;
241 function Standard_Error return EAstream_Pointer;
242
243 ---------------------------------------------------------------------------
244 -- EAfile
245 ---------------------------------------------------------------------------
246
247 type EAfile is new EAstream with private;
248 -- An EAfile reads from or writes to a named external file.
249
250 subtype File_Mode is Stream_IO.File_Mode;
251 function In_File return File_Mode renames Stream_IO.In_File;
252 function Out_File return File_Mode renames Stream_IO.Out_File;
253 function Append_File return File_Mode renames Stream_IO.Append_File;
254
255 ---------------------
256 -- File management --
257 ---------------------
258
259 procedure Create
260 (File : in out EAfile;
261 Name : in EAstring := Null_EAstring;
262 Encoding : in Character_Encoding := OS.OS_Encoding;
263 Form : in String := "");
264 -- Creates a new external file, with the given name and form, and
265 -- associates this external file with the given EAfile. The EAfile is left
266 -- open. The character encoding of the EAfile is set to the given encoding
267 -- and its current mode is set to Out_File.
268 --
269 -- A null string for Name specifies an external file that is not
270 -- accessible after the completion of the main program (a temporary file).
271 -- The valid values of Form are the same as for
272 -- Ada.Streams.Stream_IO.Create; a null string specifies the default
273 -- options for the external file.
274 --
275 -- Status_Error is propagated if the EAfile is already open. Name_Error is
276 -- propagated if the string given as Name does not allow the
277 -- identification of an external file. Use_Error is propagated if the
278 -- external environment does not support creation of an external file with
279 -- the given name (in the absence of Name_Error) and form.
280
281 procedure Open
282 (File : in out EAfile;
283 Mode : in File_Mode;
284 Name : in EAstring;
285 Encoding : in Character_Encoding := OS.OS_Encoding;
286 Form : in String := "");
287 -- Associates the given EAfile with an existing external file having the
288 -- given name and form, and sets the current mode and character encoding
289 -- of the EAfile to the given mode and encoding. The EAfile is left open.
290 --
291 -- The valid values of Form are the same as for
292 -- Ada.Streams.Stream_IO.Open.
293 --
294 -- Status_Error is propagated if the EAfile is already open. Name_Error is
295 -- propagated if the string given as Name does not allow the
296 -- identification of an external file; in particular, this exception is
297 -- propagated if no external file with the given name exists. Use_Error is
298 -- propagated if, for the specified mode, the external environment does
299 -- not support opening for an external file with the given name (in the
300 -- absence of Name_Error) and form.
301
302 procedure Close (File : in out EAfile);
303 -- Severs the association between the given EAfile and its associated
304 -- external file. The EAfile is left closed. If the file being closed has
305 -- mode Out_File or Append_File, then the last character written since the
306 -- most recent open or reset is the last character that can be read from
307 -- the file. If nothing has been written and the mode is Out_File, then
308 -- the closed file is empty. If nothing has been written and the mode is
309 -- Append_File, then the closed file is unchanged.
310 --
311 -- Status_Error is propagated if the EAfile is not open.
312
313 procedure Delete (File : in out EAfile);
314 -- Deletes the external file associated with the given EAfile. The EAfile
315 -- is closed, and the external file ceases to exist.
316 --
317 -- Status_Error is propagated if the EAfile is not open. Use_Error is
318 -- propagated if deletion of the external file is not supported by the
319 -- external environment.
320
321 procedure Reset
322 (File : in out EAfile;
323 Mode : in File_Mode;
324 Encoding : in Character_Encoding);
325 -- Resets the given EAfile so that reading can be restarted from the
326 -- beginning of the file (for the mode In_File), and so that writing can
327 -- be restarted at the beginning of the file (for the mode Out_File) or
328 -- after the last character of the file (for the mode Append_File). The
329 -- current mode and character encoding of the EAfile are set to the given
330 -- mode and encoding. If the EAfile has mode Out_File or Append_File when
331 -- Reset is called, the last character written since the most recent open
332 -- or reset is the last character that can be read from the file. If
333 -- nothing has been written and the mode is Out_File, then the reset file
334 -- is empty. If nothing has been written and the mode is Append_File, then
335 -- the reset file is unchanged.
336 --
337 -- Status_Error is propagated if the EAfile is not open. Use_Error is
338 -- propagated if the external environment does not support resetting for
339 -- the external file and, also, if the external environment does not
340 -- support resetting to the specified mode for the external file.
341
342 procedure Reset (File : in out EAfile; Mode : in File_Mode);
343 -- Like above, but does not change the character encoding.
344
345 procedure Reset (File : in out EAfile);
346 -- Like above, but does not change the character encoding or the mode.
347
348 function Mode (File : in EAfile) return File_Mode;
349 -- Returns the current mode of the given EAfile. Status_Error is
350 -- propagated if the EAfile is not open.
351
352 function Name (File : in EAfile) return EAstring;
353 -- Returns a string which uniquely identifies the external file currently
354 -- associated with the given EAfile (and may thus be used in an Open
355 -- operation).
356 --
357 -- Status_Error is propagated if the EAfile is not open. Use_Error is
358 -- propagated if the associated external file is a temporary file that
359 -- cannot be opened by any name.
360
361 function Form (File : in EAfile) return String;
362 -- Returns the form string for the external file currently associated with
363 -- the given EAfile. Status_Error is propagated if the EAfile is not open.
364
365 --------------------
366 -- Buffer control --
367 --------------------
368
369 procedure Flush (File : in EAfile);
370 -- Synchronizes the external file with the internal file (by flushing any
371 -- internal buffers) without closing the file or changing the position.
372 -- Mode_Error is propagated if the mode of the EAfile is In_File.
373
374 ---------------------------------------------------------------------------
375 -- Stream_EAstream
376 ---------------------------------------------------------------------------
377
378 type Stream_EAstream is new EAstream with private;
379 -- A Stream_EAstream reads and writes through an object in
380 -- Root_Stream_Type'Class, so that you can wrap an EAstream around
381 -- anything that you can get a Stream_Access value for.
382
383 ---------------------
384 -- File management --
385 ---------------------
386
387 procedure Connect
388 (File : in out Stream_EAstream;
389 Stream : in Stream_IO.Stream_Access;
390 Encoding : in Character_Encoding := OS.OS_Encoding);
391 -- Connects an EAstream object to an open byte stream and sets the
392 -- character encoding to be used when reading from or writing to the
393 -- stream.
394
395 ---------------------------------------------------------------------------
396 -- Exceptions
397 ---------------------------------------------------------------------------
398
399 Status_Error : exception renames Ada.IO_Exceptions.Status_Error;
400 Mode_Error : exception renames Ada.IO_Exceptions.Mode_Error;
401 Name_Error : exception renames Ada.IO_Exceptions.Name_Error;
402 Use_Error : exception renames Ada.IO_Exceptions.Use_Error;
403 Device_Error : exception renames Ada.IO_Exceptions.Device_Error;
404 End_Error : exception renames Ada.IO_Exceptions.End_Error;
405
406 Invalid_Line_Break : exception;
407 -- Set_Recognized_Line_Breaks was called with a string that wasn't one or
408 -- two characters long.
409
410 Invalid_Byte_Sequence : exception;
411 -- The file's content wasn't a valid encoded text for the file's encoding.
412
413 ---------------------------------------------------------------------------
414 -----
415private
416 ---------------------------------------------------------------------------
417 -----
418
419 use AdaCL.OS.Low_Level;
420 use Ada.Characters.Latin_1;
421 use AdaCL.EAstrings.Latin_1;
422
423 Native_Line_Breaks : constant array (Known_OS) of EAstring := [
424 Windows => +[CR, LF],
425 MacOS => +[CR, LF],
426 Linux => +[1 => LF]];
427
428 type Line_Break_Record is record
429 String : EAstring;
430 Possible : Boolean;
431 end record;
432 -- Used for line breaks to recognize when reading. Possible tells whether
433 -- this line break can be expressed in the encoding that characters from
434 -- the file are in when they are compared to the recognized line breaks
435 -- (because if it can't, then this line break can't occur in the file).
436
437 type Line_Break_Array is array (Integer range <>) of Line_Break_Record;
438
439 type Line_Break_Pointer is access all Line_Break_Array;
440
441 Known_Line_Breaks : constant Line_Break_Array :=
442 [(+[1 => LF], True),
443 (+[1 => CR], True),
444 (+[CR, LF], True),
445 (+[1 => NEL], True)];
446
447 type EAstream is abstract new Ada.Finalization.Limited_Controlled with
448 record
449 Encoding : Character_Encoding := Null_String_Encoding;
450 Converter : EAstrings.Converter := Null_Converter;
451 Line_Break : EAstring := Native_Line_Breaks (This_OS);
452 Line_Breaks : Line_Break_Pointer;
453 Long_Line_Breaks : Boolean := True;
454 Line_Break_Buffer : EAstring := Null_EAstring;
455 Ended : Boolean := False;
456 end record;
457 -- Encoding is how the file is encoded externally (on the disk for
458 -- example). Converter is used when reading files that must be transcoded
459 -- to find the character boundaries.
460 --
461 -- Line_Break is the string to be written for line breaks by Put_Line and
462 -- New_Line.
463 --
464 -- Line_Breaks is the set of strings to be recognized as line breaks by
465 -- Get_Line and Skip_Line.
466 --
467 -- Long_Line_Breaks is kept True when any of the recognized line breaks is
468 -- two characters long (CR LF), and False when they are all one character
469 -- long.
470 --
471 -- Line_Break_Buffer is used when Get_Line has stopped at a one-character
472 -- line break (CR) that might be the beginning of a two-character line
473 -- break (CR LF). (It is also used internally in Get_Line.)
474 --
475 -- Ended is set to True when the end of the file has been found.
476
477 overriding procedure Initialize (Object : in out EAstream);
478 overriding procedure Finalize (Object : in out EAstream);
479
480 type EAfile is new EAstream with record
481 Base_File : Stream_IO.File_Type;
482 end record;
483
484 type Stream_EAstream is new EAstream with record
485 Stream : Stream_IO.Stream_Access;
486 end record;
487
488 overriding function Is_Open (File : in EAfile) return Boolean;
489
490 overriding function Is_Open (File : in Stream_EAstream) return Boolean;
491
492 overriding function Stream (File : EAfile) return Stream_IO.Stream_Access;
493
494 overriding function Stream (File : Stream_EAstream) return Stream_IO.Stream_Access;
495
496 overriding procedure Set_Encoding
497 (File : in out EAfile;
498 Encoding : in Character_Encoding);
499
500end AdaCL.EAstrings.IO;