1 | ----------------------------------------------------------------------------- |
---|---|
2 | -- |
3 | -- Copyright 2004 Björn Persson. |
4 | -- |
5 | -- This library is free software; you can redistribute it and/or modify it |
6 | -- under the terms of the GNU General Public License, version 2, as published |
7 | -- by the Free Software Foundation. |
8 | -- |
9 | -- As a special exception, if other files instantiate generics from this |
10 | -- unit, or you link this unit with other files to produce an executable, |
11 | -- this unit does not by itself cause the resulting executable to be covered |
12 | -- by the General Public License. This exception does not however invalidate |
13 | -- any other reasons why the executable file might be covered by the General |
14 | -- Public License. |
15 | -- |
16 | ---------------------------------------------------------------------------- |
17 | |
18 | pragma License (Modified_Gpl); |
19 | pragma Ada_2022; |
20 | |
21 | with Ada.Streams.Stream_IO; use Ada.Streams; |
22 | with Ada.IO_Exceptions; |
23 | with AdaCL.EAstrings.OS; |
24 | with Ada.Characters.Latin_1; |
25 | with AdaCL.EAstrings.Latin_1; |
26 | with AdaCL.OS.Low_Level; |
27 | |
28 | package AdaCL.EAstrings.IO is |
29 | -- |
30 | -- This package provides encoding-aware output to and input from text |
31 | -- files and text streams. Text streams are byte streams that can be read |
32 | -- or written sequentially, and where the bytes constitute text encoded in |
33 | -- some character encoding. They have no defined line length; line breaks |
34 | -- are represented as characters in the stream. There is no concept of |
35 | -- pages in this package. |
36 | -- |
37 | -- Each open stream has an associated character encoding. By default, |
38 | -- streams are assumed to be encoded in the encoding that OS.OS_Encoding |
39 | -- reports. Another encoding can be specified when the stream is opened or |
40 | -- later. How line breaks are represented when writing and the set of line |
41 | -- breaks that shall be recognized when reading can also be configured. |
42 | -- |
43 | -- In this package it is assumed that Ada.Streams.Stream_Element equals |
44 | -- one byte, that is, eight bits. |
45 | -- |
46 | |
47 | pragma Elaborate_Body; |
48 | |
49 | --------------------------------------------------------------------------- |
50 | -- EAstream |
51 | --------------------------------------------------------------------------- |
52 | |
53 | type EAstream is abstract tagged limited private; |
54 | type EAstream_Pointer is access all EAstream'Class; |
55 | |
56 | type Read_Stop_Cause is (All_Done, End_Of_File, Incomplete, Invalid); |
57 | -- Reason why Get or Get_Line returns. |
58 | -- All_Done : The requested amount of data was read. |
59 | -- End_Of_File : The end of the file was reached before the requested |
60 | -- amount of data was read. |
61 | -- Incomplete : The file ended in an incomplete code sequence. |
62 | -- Invalid : The file's content wasn't a valid encoded text for the |
63 | -- file's encoding. |
64 | |
65 | --------------------- |
66 | -- File management -- |
67 | --------------------- |
68 | |
69 | function Is_Open (File : in EAstream) return Boolean is abstract; |
70 | -- Returns True if the EAstream is open (that is, if it is associated with |
71 | -- a byte stream), otherwise returns False. |
72 | |
73 | function End_Of_File (File : in EAstream) return Boolean; |
74 | -- Returns True if the end of the file has been found, otherwise returns |
75 | -- False. |
76 | -- |
77 | -- End_Of_File can't look ahead. It only returns True if Get or Get_Line |
78 | -- has already encountered the end of the file. It is quite possible that |
79 | -- the last character has already been read but End_Of_File returns False, |
80 | -- because only the next read attempt will fail and report the end of the |
81 | -- file. |
82 | |
83 | function Stream |
84 | (File : in EAstream) |
85 | return Stream_IO.Stream_Access |
86 | is abstract; |
87 | -- Returns a pointer to the underlying raw byte stream that File is |
88 | -- associated with. Status_Error is propagated if File is not open. |
89 | |
90 | ---------------------------- |
91 | -- File format management -- |
92 | ---------------------------- |
93 | |
94 | procedure Set_Encoding |
95 | (File : in out EAstream; |
96 | Encoding : in Character_Encoding); |
97 | -- Sets the character encoding that the byte stream should be interpreted |
98 | -- as when reading from File, or that the text should be encoded in when |
99 | -- writing to File. The character encoding can be changed in the middle of |
100 | -- the file if so desired. |
101 | |
102 | function Encoding (File : in EAstream) return Character_Encoding; |
103 | -- Returns the character encoding that the byte stream is currently |
104 | -- interpreted as when reading from File, or that the text is currently |
105 | -- encoded in when writing to File. |
106 | |
107 | procedure Set_Line_Break |
108 | (File : in out EAstream; |
109 | Line_Break : in EAstring); |
110 | -- Sets the string to be written for line breaks when Put_Line or New_Line |
111 | -- is called on File. |
112 | |
113 | function Line_Break (File : in EAstream) return EAstring; |
114 | -- Returns the string that is currently used for line breaks when Put_Line |
115 | -- or New_Line is called on File. |
116 | |
117 | procedure Set_Recognized_Line_Breaks |
118 | (File : in out EAstream; |
119 | Line_Breaks : in EAstring_Array); |
120 | -- Sets the set of strings to be recognized as line breaks by Get_Line and |
121 | -- Skip_Line when reading from File. |
122 | |
123 | function Recognized_Line_Breaks |
124 | (File : in EAstream) |
125 | return EAstring_Array; |
126 | -- Returns the set of strings that are currently recognized as line breaks |
127 | -- by Get_Line and Skip_Line when reading from File. |
128 | |
129 | ------------------ |
130 | -- Line control -- |
131 | ------------------ |
132 | |
133 | procedure New_Line (File : in EAstream; Spacing : in Positive := 1); |
134 | -- Writes the number of line breaks specified by Spacing to File. |
135 | -- Mode_Error is propagated if File is an EAfile whose mode is not |
136 | -- Out_File or Append_File. |
137 | |
138 | procedure New_Line (Spacing : in Positive := 1); |
139 | -- Like above, but writes to Standard_Output. |
140 | |
141 | procedure Skip_Line |
142 | (File : in out EAstream; |
143 | Spacing : in Positive := 1); |
144 | -- Reads from File and discards all characters until the number of line |
145 | -- breaks specified by Spacing has been read. |
146 | -- |
147 | -- Mode_Error is propagated if File is an EAfile whose mode is not |
148 | -- In_File. End_Error is propagated if an attempt is made to read a file |
149 | -- terminator. Incomplete_Byte_Sequence is propagated if File ends in an |
150 | -- incomplete code sequence. |
151 | -- |
152 | -- Invalid_Byte_Sequence is propagated if File's content isn't a valid |
153 | -- encoded text for File's encoding. |
154 | |
155 | procedure Skip_Line (Spacing : in Positive := 1); |
156 | -- Like above, but reads from Standard_Input. |
157 | |
158 | ----------------------------- |
159 | -- String input and output -- |
160 | ----------------------------- |
161 | |
162 | procedure Get |
163 | (File : in out EAstream; |
164 | Item : out EAstring; |
165 | Length : in Natural; |
166 | Exit_Cause : out Read_Stop_Cause); |
167 | -- Reads the number of characters specified by Length from File and stores |
168 | -- them in Item. If line breaks are encountered they will be included in |
169 | -- Item just like other characters. |
170 | -- |
171 | -- Mode_Error is propagated if File is an EAfile whose mode is not |
172 | -- In_File. Otherwise, success or failure is reported in Exit_Cause. In |
173 | -- case of failure, as much as could be read is returned in Item. |
174 | |
175 | procedure Get |
176 | (File : in out EAstream; |
177 | Item : out EAstring; |
178 | Length : in Natural); |
179 | -- Like above, but raises exceptions when reading fails: End_Error is |
180 | -- raised if an attempt is made to read a file terminator. |
181 | -- Incomplete_Byte_Sequence is raised if File ends in an incomplete code |
182 | -- sequence. |
183 | -- |
184 | -- Invalid_Byte_Sequence is raised if File's content isn't a valid encoded |
185 | -- text for File's encoding. |
186 | |
187 | procedure Get (Item : out EAstring; Length : in Natural); |
188 | -- Like above, but reads from Standard_Input. |
189 | |
190 | procedure Get_Line |
191 | (File : in out EAstream; |
192 | Item : out EAstring; |
193 | Exit_Cause : out Read_Stop_Cause); |
194 | -- Reads characters from File and stores them in Item until one of the |
195 | -- recognized line breaks is encountered. In the next call to Get or |
196 | -- Get_Line, the first character read will be the first one after the line |
197 | -- break. |
198 | -- |
199 | -- Mode_Error is propagated if File is an EAfile whose mode is not |
200 | -- In_File. Otherwise, success or failure is reported in Exit_Cause. In |
201 | -- case of failure, as much as could be read is returned in Item. |
202 | -- |
203 | -- If the file ends before anything has been read, Exit_Cause is set to |
204 | -- End_Of_File. If characters have been read, Exit_Cause is set to |
205 | -- All_Done and End_Of_File is not reported until the next read attempt. |
206 | -- That is, it is not required that a file ends with a line break. |
207 | |
208 | procedure Get_Line (File : in out EAstream; Item : out EAstring); |
209 | -- Like above, but raises exceptions when reading fails: End_Error is |
210 | -- raised if the file ends before anything has been read. |
211 | -- Incomplete_Byte_Sequence is raised if File ends in an incomplete code |
212 | -- sequence. |
213 | -- |
214 | -- Invalid_Byte_Sequence is raised if File's content isn't a valid encoded |
215 | -- text for File's encoding. |
216 | |
217 | procedure Get_Line (Item : out EAstring); |
218 | -- Like above, but reads from Standard_Input. |
219 | |
220 | procedure Put (File : in EAstream; Item : in EAstring); |
221 | -- Writes the string in Item to File. Mode_Error is propagated if File is |
222 | -- an EAfile whose mode is not Out_File or Append_File. |
223 | |
224 | procedure Put (Item : in EAstring); |
225 | -- Like above, but writes to Standard_Output. |
226 | |
227 | procedure Put_Line (File : in EAstream; Item : in EAstring); |
228 | -- Writes the string in Item to File, followed by a line break. Mode_Error |
229 | -- is propagated if File is an EAfile whose mode is not Out_File or |
230 | -- Append_File. |
231 | |
232 | procedure Put_Line (Item : in EAstring); |
233 | -- Like above, but writes to Standard_Output. |
234 | |
235 | ----------------------------------------------- |
236 | -- The default input, output and error files -- |
237 | ----------------------------------------------- |
238 | |
239 | function Standard_Input return EAstream_Pointer; |
240 | function Standard_Output return EAstream_Pointer; |
241 | function Standard_Error return EAstream_Pointer; |
242 | |
243 | --------------------------------------------------------------------------- |
244 | -- EAfile |
245 | --------------------------------------------------------------------------- |
246 | |
247 | type EAfile is new EAstream with private; |
248 | -- An EAfile reads from or writes to a named external file. |
249 | |
250 | subtype File_Mode is Stream_IO.File_Mode; |
251 | function In_File return File_Mode renames Stream_IO.In_File; |
252 | function Out_File return File_Mode renames Stream_IO.Out_File; |
253 | function Append_File return File_Mode renames Stream_IO.Append_File; |
254 | |
255 | --------------------- |
256 | -- File management -- |
257 | --------------------- |
258 | |
259 | procedure Create |
260 | (File : in out EAfile; |
261 | Name : in EAstring := Null_EAstring; |
262 | Encoding : in Character_Encoding := OS.OS_Encoding; |
263 | Form : in String := ""); |
264 | -- Creates a new external file, with the given name and form, and |
265 | -- associates this external file with the given EAfile. The EAfile is left |
266 | -- open. The character encoding of the EAfile is set to the given encoding |
267 | -- and its current mode is set to Out_File. |
268 | -- |
269 | -- A null string for Name specifies an external file that is not |
270 | -- accessible after the completion of the main program (a temporary file). |
271 | -- The valid values of Form are the same as for |
272 | -- Ada.Streams.Stream_IO.Create; a null string specifies the default |
273 | -- options for the external file. |
274 | -- |
275 | -- Status_Error is propagated if the EAfile is already open. Name_Error is |
276 | -- propagated if the string given as Name does not allow the |
277 | -- identification of an external file. Use_Error is propagated if the |
278 | -- external environment does not support creation of an external file with |
279 | -- the given name (in the absence of Name_Error) and form. |
280 | |
281 | procedure Open |
282 | (File : in out EAfile; |
283 | Mode : in File_Mode; |
284 | Name : in EAstring; |
285 | Encoding : in Character_Encoding := OS.OS_Encoding; |
286 | Form : in String := ""); |
287 | -- Associates the given EAfile with an existing external file having the |
288 | -- given name and form, and sets the current mode and character encoding |
289 | -- of the EAfile to the given mode and encoding. The EAfile is left open. |
290 | -- |
291 | -- The valid values of Form are the same as for |
292 | -- Ada.Streams.Stream_IO.Open. |
293 | -- |
294 | -- Status_Error is propagated if the EAfile is already open. Name_Error is |
295 | -- propagated if the string given as Name does not allow the |
296 | -- identification of an external file; in particular, this exception is |
297 | -- propagated if no external file with the given name exists. Use_Error is |
298 | -- propagated if, for the specified mode, the external environment does |
299 | -- not support opening for an external file with the given name (in the |
300 | -- absence of Name_Error) and form. |
301 | |
302 | procedure Close (File : in out EAfile); |
303 | -- Severs the association between the given EAfile and its associated |
304 | -- external file. The EAfile is left closed. If the file being closed has |
305 | -- mode Out_File or Append_File, then the last character written since the |
306 | -- most recent open or reset is the last character that can be read from |
307 | -- the file. If nothing has been written and the mode is Out_File, then |
308 | -- the closed file is empty. If nothing has been written and the mode is |
309 | -- Append_File, then the closed file is unchanged. |
310 | -- |
311 | -- Status_Error is propagated if the EAfile is not open. |
312 | |
313 | procedure Delete (File : in out EAfile); |
314 | -- Deletes the external file associated with the given EAfile. The EAfile |
315 | -- is closed, and the external file ceases to exist. |
316 | -- |
317 | -- Status_Error is propagated if the EAfile is not open. Use_Error is |
318 | -- propagated if deletion of the external file is not supported by the |
319 | -- external environment. |
320 | |
321 | procedure Reset |
322 | (File : in out EAfile; |
323 | Mode : in File_Mode; |
324 | Encoding : in Character_Encoding); |
325 | -- Resets the given EAfile so that reading can be restarted from the |
326 | -- beginning of the file (for the mode In_File), and so that writing can |
327 | -- be restarted at the beginning of the file (for the mode Out_File) or |
328 | -- after the last character of the file (for the mode Append_File). The |
329 | -- current mode and character encoding of the EAfile are set to the given |
330 | -- mode and encoding. If the EAfile has mode Out_File or Append_File when |
331 | -- Reset is called, the last character written since the most recent open |
332 | -- or reset is the last character that can be read from the file. If |
333 | -- nothing has been written and the mode is Out_File, then the reset file |
334 | -- is empty. If nothing has been written and the mode is Append_File, then |
335 | -- the reset file is unchanged. |
336 | -- |
337 | -- Status_Error is propagated if the EAfile is not open. Use_Error is |
338 | -- propagated if the external environment does not support resetting for |
339 | -- the external file and, also, if the external environment does not |
340 | -- support resetting to the specified mode for the external file. |
341 | |
342 | procedure Reset (File : in out EAfile; Mode : in File_Mode); |
343 | -- Like above, but does not change the character encoding. |
344 | |
345 | procedure Reset (File : in out EAfile); |
346 | -- Like above, but does not change the character encoding or the mode. |
347 | |
348 | function Mode (File : in EAfile) return File_Mode; |
349 | -- Returns the current mode of the given EAfile. Status_Error is |
350 | -- propagated if the EAfile is not open. |
351 | |
352 | function Name (File : in EAfile) return EAstring; |
353 | -- Returns a string which uniquely identifies the external file currently |
354 | -- associated with the given EAfile (and may thus be used in an Open |
355 | -- operation). |
356 | -- |
357 | -- Status_Error is propagated if the EAfile is not open. Use_Error is |
358 | -- propagated if the associated external file is a temporary file that |
359 | -- cannot be opened by any name. |
360 | |
361 | function Form (File : in EAfile) return String; |
362 | -- Returns the form string for the external file currently associated with |
363 | -- the given EAfile. Status_Error is propagated if the EAfile is not open. |
364 | |
365 | -------------------- |
366 | -- Buffer control -- |
367 | -------------------- |
368 | |
369 | procedure Flush (File : in EAfile); |
370 | -- Synchronizes the external file with the internal file (by flushing any |
371 | -- internal buffers) without closing the file or changing the position. |
372 | -- Mode_Error is propagated if the mode of the EAfile is In_File. |
373 | |
374 | --------------------------------------------------------------------------- |
375 | -- Stream_EAstream |
376 | --------------------------------------------------------------------------- |
377 | |
378 | type Stream_EAstream is new EAstream with private; |
379 | -- A Stream_EAstream reads and writes through an object in |
380 | -- Root_Stream_Type'Class, so that you can wrap an EAstream around |
381 | -- anything that you can get a Stream_Access value for. |
382 | |
383 | --------------------- |
384 | -- File management -- |
385 | --------------------- |
386 | |
387 | procedure Connect |
388 | (File : in out Stream_EAstream; |
389 | Stream : in Stream_IO.Stream_Access; |
390 | Encoding : in Character_Encoding := OS.OS_Encoding); |
391 | -- Connects an EAstream object to an open byte stream and sets the |
392 | -- character encoding to be used when reading from or writing to the |
393 | -- stream. |
394 | |
395 | --------------------------------------------------------------------------- |
396 | -- Exceptions |
397 | --------------------------------------------------------------------------- |
398 | |
399 | Status_Error : exception renames Ada.IO_Exceptions.Status_Error; |
400 | Mode_Error : exception renames Ada.IO_Exceptions.Mode_Error; |
401 | Name_Error : exception renames Ada.IO_Exceptions.Name_Error; |
402 | Use_Error : exception renames Ada.IO_Exceptions.Use_Error; |
403 | Device_Error : exception renames Ada.IO_Exceptions.Device_Error; |
404 | End_Error : exception renames Ada.IO_Exceptions.End_Error; |
405 | |
406 | Invalid_Line_Break : exception; |
407 | -- Set_Recognized_Line_Breaks was called with a string that wasn't one or |
408 | -- two characters long. |
409 | |
410 | Invalid_Byte_Sequence : exception; |
411 | -- The file's content wasn't a valid encoded text for the file's encoding. |
412 | |
413 | --------------------------------------------------------------------------- |
414 | ----- |
415 | private |
416 | --------------------------------------------------------------------------- |
417 | ----- |
418 | |
419 | use AdaCL.OS.Low_Level; |
420 | use Ada.Characters.Latin_1; |
421 | use AdaCL.EAstrings.Latin_1; |
422 | |
423 | Native_Line_Breaks : constant array (Known_OS) of EAstring := [ |
424 | Windows => +[CR, LF], |
425 | MacOS => +[CR, LF], |
426 | Linux => +[1 => LF]]; |
427 | |
428 | type Line_Break_Record is record |
429 | String : EAstring; |
430 | Possible : Boolean; |
431 | end record; |
432 | -- Used for line breaks to recognize when reading. Possible tells whether |
433 | -- this line break can be expressed in the encoding that characters from |
434 | -- the file are in when they are compared to the recognized line breaks |
435 | -- (because if it can't, then this line break can't occur in the file). |
436 | |
437 | type Line_Break_Array is array (Integer range <>) of Line_Break_Record; |
438 | |
439 | type Line_Break_Pointer is access all Line_Break_Array; |
440 | |
441 | Known_Line_Breaks : constant Line_Break_Array := |
442 | [(+[1 => LF], True), |
443 | (+[1 => CR], True), |
444 | (+[CR, LF], True), |
445 | (+[1 => NEL], True)]; |
446 | |
447 | type EAstream is abstract new Ada.Finalization.Limited_Controlled with |
448 | record |
449 | Encoding : Character_Encoding := Null_String_Encoding; |
450 | Converter : EAstrings.Converter := Null_Converter; |
451 | Line_Break : EAstring := Native_Line_Breaks (This_OS); |
452 | Line_Breaks : Line_Break_Pointer; |
453 | Long_Line_Breaks : Boolean := True; |
454 | Line_Break_Buffer : EAstring := Null_EAstring; |
455 | Ended : Boolean := False; |
456 | end record; |
457 | -- Encoding is how the file is encoded externally (on the disk for |
458 | -- example). Converter is used when reading files that must be transcoded |
459 | -- to find the character boundaries. |
460 | -- |
461 | -- Line_Break is the string to be written for line breaks by Put_Line and |
462 | -- New_Line. |
463 | -- |
464 | -- Line_Breaks is the set of strings to be recognized as line breaks by |
465 | -- Get_Line and Skip_Line. |
466 | -- |
467 | -- Long_Line_Breaks is kept True when any of the recognized line breaks is |
468 | -- two characters long (CR LF), and False when they are all one character |
469 | -- long. |
470 | -- |
471 | -- Line_Break_Buffer is used when Get_Line has stopped at a one-character |
472 | -- line break (CR) that might be the beginning of a two-character line |
473 | -- break (CR LF). (It is also used internally in Get_Line.) |
474 | -- |
475 | -- Ended is set to True when the end of the file has been found. |
476 | |
477 | overriding procedure Initialize (Object : in out EAstream); |
478 | overriding procedure Finalize (Object : in out EAstream); |
479 | |
480 | type EAfile is new EAstream with record |
481 | Base_File : Stream_IO.File_Type; |
482 | end record; |
483 | |
484 | type Stream_EAstream is new EAstream with record |
485 | Stream : Stream_IO.Stream_Access; |
486 | end record; |
487 | |
488 | overriding function Is_Open (File : in EAfile) return Boolean; |
489 | |
490 | overriding function Is_Open (File : in Stream_EAstream) return Boolean; |
491 | |
492 | overriding function Stream (File : EAfile) return Stream_IO.Stream_Access; |
493 | |
494 | overriding function Stream (File : Stream_EAstream) return Stream_IO.Stream_Access; |
495 | |
496 | overriding procedure Set_Encoding |
497 | (File : in out EAfile; |
498 | Encoding : in Character_Encoding); |
499 | |
500 | end AdaCL.EAstrings.IO; |