|| /******************************************************************************* * * copyright: Copyright (c) 2007 Daniel Keep. All rights reserved. * * license: BSD style: $(LICENSE) * * version: Initial release: December 2007 * * author: Daniel Keep * ******************************************************************************/ module tango.util.compress.Zip; /* TODO ==== * Disable UTF encoding until I've worked out what version of Zip that's related to... (actually; it's entirely possible that's it's merely a *proposal* at the moment.) (*Done*) * Make ZipEntry safe: make them aware that their creating reader has been destroyed. */ import tango.core.ByteSwap : ByteSwap; import tango.io.device.Array : Array; import tango.io.device.File : File; import Path = tango.io.Path; import tango.io.device.FileMap : FileMap; import tango.io.stream.Zlib : ZlibInput, ZlibOutput; import tango.util.digest.Crc32 : Crc32; import tango.io.model.IConduit : IConduit, InputStream, OutputStream; import tango.io.stream.Digester : DigestInput; import tango.time.Time : Time, TimeSpan; import tango.time.WallClock : WallClock; import tango.time.chrono.Gregorian : Gregorian; import Integer = tango.text.convert.Integer; debug(Zip) import tango.io.Stdout : Stderr; ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // Implementation crap // // Why is this here, you ask? Because of bloody DMD forward reference bugs. // For pete's sake, Walter, FIX THEM, please! // // To skip to the actual user-visible stuff, search for "Shared stuff". private { ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // LocalFileHeader // align(1) struct LocalFileHeaderData { align(1): ushort extract_version = ushort.max; ushort general_flags = 0; ushort compression_method = 0; ushort modification_file_time = 0; ushort modification_file_date = 0; uint crc_32 = 0; // offsetof = 10 uint compressed_size = 0; uint uncompressed_size = 0; ushort file_name_length = 0; ushort extra_field_length = 0; debug(Zip) void dump() { Stderr ("LocalFileHeader.Data {")("\n") (" extract_version = ")(extract_version)("\n") (" general_flags = ")(general_flags)("\n") (" compression_method = ")(compression_method)("\n") (" modification_file_time = ")(modification_file_time)("\n") (" modification_file_date = ")(modification_file_date)("\n") (" crc_32 = ")(crc_32)("\n") (" compressed_size = ")(compressed_size)("\n") (" uncompressed_size = ")(uncompressed_size)("\n") (" file_name_length = ")(file_name_length)("\n") (" extra_field_length = ")(extra_field_length)("\n") ("}").newline; } } struct LocalFileHeader { const uint signature = 0x04034b50; alias LocalFileHeaderData Data; Data data; static assert( Data.sizeof == 26 ); char[] file_name; ubyte[] extra_field; void[] data_arr() { return (&data)[0..1]; } void put(OutputStream output) { // Make sure var-length fields will fit. if( file_name.length > ushort.max ) ZipException.fntoolong; if( extra_field.length > ushort.max ) ZipException.eftoolong; // Encode filename auto file_name = utf8_to_cp437(this.file_name); scope(exit) if( file_name !is cast(ubyte[])this.file_name ) delete file_name; if( file_name is null ) ZipException.fnencode; // Update lengths in data Data data = this.data; data.file_name_length = cast(ushort) file_name.length; data.extra_field_length = cast(ushort) extra_field.length; // Do it version( BigEndian ) swapAll(data); writeExact(output, (&data)[0..1]); writeExact(output, file_name); writeExact(output, extra_field); } void fill(InputStream src) { readExact(src, data_arr()); version( BigEndian ) swapAll(data); //debug(Zip) data.dump; auto tmp = new ubyte[data.file_name_length]; readExact(src, tmp); file_name = cp437_to_utf8(tmp); if( cast(char*) tmp.ptr !is file_name.ptr ) delete tmp; extra_field = new ubyte[data.extra_field_length]; readExact(src, extra_field); } /* * This method will check to make sure that the local and central headers * are the same; if they're not, then that indicates that the archive is * corrupt. */ bool agrees_with(FileHeader h) { // NOTE: extra_field used to be compared with h.extra_field, but this caused // an assertion in certain archives. I found a mention of these fields being // allowed to be different, so I think it in general is wrong to include in // this sanity check. larsivi 20081111 if( data.extract_version != h.data.extract_version || data.general_flags != h.data.general_flags || data.compression_method != h.data.compression_method || data.modification_file_time != h.data.modification_file_time || data.modification_file_date != h.data.modification_file_date || file_name != h.file_name ) return false; // We need a separate check for the sizes and crc32, since these will // be zero if a trailing descriptor was used. if( !h.usingDataDescriptor() && ( data.crc_32 != h.data.crc_32 || data.compressed_size != h.data.compressed_size || data.uncompressed_size != h.data.uncompressed_size ) ) return false; return true; } } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // FileHeader // align(1) struct FileHeaderData { align(1): ubyte zip_version; ubyte file_attribute_type; ushort extract_version; ushort general_flags; ushort compression_method; ushort modification_file_time; ushort modification_file_date; uint crc_32; uint compressed_size; uint uncompressed_size; ushort file_name_length; ushort extra_field_length; ushort file_comment_length; ushort disk_number_start; ushort internal_file_attributes = 0; uint external_file_attributes = 0; int relative_offset_of_local_header; debug(Zip) void dump() { Stderr ("FileHeader.Data {\n") (" zip_version = ")(zip_version)("\n") (" file_attribute_type = ")(file_attribute_type)("\n") (" extract_version = ")(extract_version)("\n") (" general_flags = ")(general_flags)("\n") (" compression_method = ")(compression_method)("\n") (" modification_file_time = ")(modification_file_time)("\n") (" modification_file_date = ")(modification_file_date)("\n") (" crc_32 = ")(crc_32)("\n") (" compressed_size = ")(compressed_size)("\n") (" uncompressed_size = ")(uncompressed_size)("\n") (" file_name_length = ")(file_name_length)("\n") (" extra_field_length = ")(extra_field_length)("\n") (" file_comment_length = ")(file_comment_length)("\n") (" disk_number_start = ")(disk_number_start)("\n") (" internal_file_attributes = ")(internal_file_attributes)("\n") (" external_file_attributes = ")(external_file_attributes)("\n") (" relative_offset_of_local_header = ")(relative_offset_of_local_header) ("\n") ("}").newline; } void fromLocal(LocalFileHeader.Data data) { extract_version = data.extract_version; general_flags = data.general_flags; compression_method = data.compression_method; modification_file_time = data.modification_file_time; modification_file_date = data.modification_file_date; crc_32 = data.crc_32; compressed_size = data.compressed_size; uncompressed_size = data.uncompressed_size; file_name_length = data.file_name_length; extra_field_length = data.extra_field_length; } } struct FileHeader { const uint signature = 0x02014b50; alias FileHeaderData Data; Data* data; static assert( Data.sizeof == 42 ); const(char)[] file_name; ubyte[] extra_field; const(char)[] file_comment; bool usingDataDescriptor() { return !!(data.general_flags & 1<<3); } uint compressionOptions() { return (data.general_flags >> 1) & 0b11; } bool usingUtf8() { //return !!(data.general_flags & 1<<11); return false; } void[] data_arr() { return (cast(void*)data)[0 .. Data.sizeof]; } void put(OutputStream output) { // Make sure the var-length fields will fit. if( file_name.length > ushort.max ) ZipException.fntoolong; if( extra_field.length > ushort.max ) ZipException.eftoolong; if( file_comment.length > ushort.max ) ZipException.cotoolong; // encode the filename and comment auto file_name = utf8_to_cp437(this.file_name); scope(exit) if( file_name !is cast(ubyte[])this.file_name ) delete file_name; auto file_comment = utf8_to_cp437(this.file_comment); scope(exit) if( file_comment !is cast(ubyte[])this.file_comment ) delete file_comment; if( file_name is null ) ZipException.fnencode; if( file_comment is null && this.file_comment !is null ) ZipException.coencode; // Update the lengths Data data = *(this.data); data.file_name_length = cast(ushort) file_name.length; data.extra_field_length = cast(ushort) extra_field.length; data.file_comment_length = cast(ushort) file_comment.length; // Ok; let's do this! version( BigEndian ) swapAll(data); writeExact(output, (&data)[0..1]); writeExact(output, file_name); writeExact(output, extra_field); writeExact(output, file_comment); } long map(void[] src) { //debug(Zip) Stderr.formatln("FileHeader.map([0..{}])",src.length); auto old_ptr = src.ptr; data = cast(Data*) src.ptr; src = src[Data.sizeof..$]; version( BigEndian ) swapAll(*data); //debug(Zip) data.dump; inout(char[]) function(inout(ubyte[])) conv_fn; if( usingUtf8() ) conv_fn = &cp437_to_utf8; else conv_fn = &utf8_to_utf8; file_name = conv_fn( cast(ubyte[]) src[0..data.file_name_length]); src = src[data.file_name_length..$]; extra_field = cast(ubyte[]) src[0..data.extra_field_length]; src = src[data.extra_field_length..$]; file_comment = conv_fn( cast(ubyte[]) src[0..data.file_comment_length]); src = src[data.file_comment_length..$]; // Return how many bytes we've eaten //debug(Zip) Stderr.formatln(" . used {} bytes", cast(long)(src.ptr - old_ptr)); return cast(long)(src.ptr - old_ptr); } } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // EndOfCDRecord // align(1) struct EndOfCDRecordData { align(1): ushort disk_number = 0; ushort disk_with_start_of_central_directory = 0; ushort central_directory_entries_on_this_disk; ushort central_directory_entries_total; uint size_of_central_directory; uint offset_of_start_of_cd_from_starting_disk; ushort file_comment_length; debug(Zip) void dump() { Stderr .formatln("EndOfCDRecord.Data {}","{") .formatln(" disk_number = {}", disk_number) .formatln(" disk_with_start_of_central_directory = {}", disk_with_start_of_central_directory) .formatln(" central_directory_entries_on_this_disk = {}", central_directory_entries_on_this_disk) .formatln(" central_directory_entries_total = {}", central_directory_entries_total) .formatln(" size_of_central_directory = {}", size_of_central_directory) .formatln(" offset_of_start_of_cd_from_starting_disk = {}", offset_of_start_of_cd_from_starting_disk) .formatln(" file_comment_length = {}", file_comment_length) .formatln("}"); } } struct EndOfCDRecord { const uint signature = 0x06054b50; alias EndOfCDRecordData Data; Data data; static assert( data.sizeof == 18 ); char[] file_comment; void[] data_arr() { return (cast(void*)&data)[0 .. data.sizeof]; } void put(OutputStream output) { // Set up the comment; check length, encode if( file_comment.length > ushort.max ) ZipException.cotoolong; auto file_comment = utf8_to_cp437(this.file_comment); scope(exit) if( file_comment !is cast(ubyte[])this.file_comment ) delete file_comment; // Set up data block Data data = this.data; data.file_comment_length = cast(ushort) file_comment.length; version( BigEndian ) swapAll(data); writeExact(output, (&data)[0..1]); } void fill(void[] src) { //Stderr.formatln("EndOfCDRecord.fill([0..{}])",src.length); auto _data = data_arr(); _data[] = src[0.._data.length]; src = src[_data.length..$]; version( BigEndian ) swapAll(data); //data.dump; file_comment = cast(char[]) src[0..data.file_comment_length].dup; } } // End of implementation crap } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // Shared stuff public { /** * This enumeration denotes the kind of compression used on a file. */ enum Method { /// No compression should be used. Store, /// Deflate compression. Deflate, /** * This is a special value used for unsupported or unrecognised * compression methods. This value is only used internally. */ Unsupported } } private { const ushort ZIP_VERSION = 20; const ushort MAX_EXTRACT_VERSION = 20; /* compression flags uses trailing descriptor | utf-8 encoding | | ^ ^ /\ */ const ushort SUPPORTED_FLAGS = 0b00_0_0_0_0000_0_0_0_1_11_0; const ushort UNSUPPORTED_FLAGS = ~SUPPORTED_FLAGS; Method toMethod(ushort method) { switch( method ) { case 0: return Method.Store; case 8: return Method.Deflate; default: return Method.Unsupported; } } ushort fromMethod(Method method) { switch( method ) { case Method.Store: return 0; case Method.Deflate: return 8; default: assert(false, "unsupported compression method"); } } /* NOTE: This doesn't actually appear to work. Using the default magic * number with Tango's Crc32 digest works, however. */ //const CRC_MAGIC = 0xdebb20e3u; } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // ZipReader interface ZipReader { bool streamed(); void close(); bool more(); ZipEntry get(); ZipEntry get(ZipEntry); int opApply(int delegate(ref ZipEntry)); } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // ZipWriter interface ZipWriter { void finish(); void putFile(ZipEntryInfo info, const(char)[] path); void putStream(ZipEntryInfo info, InputStream source); void putEntry(ZipEntryInfo info, ZipEntry entry); void putData(ZipEntryInfo info, const(void)[] data); Method method(); Method method(Method); } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // ZipBlockReader /** * The ZipBlockReader class is used to parse a Zip archive. It exposes the * contents of the archive via an iteration interface. For instance, to loop * over all files in an archive, one can use either * * ----- * foreach( entry ; reader ) * ... * ----- * * Or * * ----- * while( reader.more ) * { * auto entry = reader.get; * ... * } * ----- * * See the ZipEntry class for more information on the contents of entries. * * Note that this class can only be used with input sources which can be * freely seeked. Also note that you may open a ZipEntry instance produced by * this reader at any time until the ZipReader that created it is closed. */ class ZipBlockReader : ZipReader { /** * Creates a ZipBlockReader using the specified file on the local * filesystem. */ this(const(char)[] path) { file_source = new File(path); this(file_source); } /** * Creates a ZipBlockReader using the provided InputStream. Please note * that this InputStream must be attached to a conduit implementing the * IConduit.Seek interface. */ this(InputStream source) in { assert( cast(IConduit.Seek) source.conduit, "source stream must be seekable" ); } body { this.source = source; this.seeker = source; //cast(IConduit.Seek) source; } bool streamed() { return false; } /** * Closes the reader, and releases all resources. After this operation, * all ZipEntry instances created by this ZipReader are invalid and should * not be used. */ void close() { // NOTE: Originally more of the GC allocated data in this class were // explicitly deleted here, such as cd_data - this caused segfaults // and have been removed as they were not necessary from correctness // point of view, and the memory usage win is questionable. state = State.Done; source = null; seeker = null; delete headers; if( file_source !is null ) { file_source.close(); delete file_source; } } /** * Returns true if and only if there are additional files in the archive * which have not been read via the get method. This returns true before * the first call to get (assuming the opened archive is non-empty), and * false after the last file has been accessed. */ bool more() { switch( state ) { case State.Init: read_cd(); assert( state == State.Open ); return more(); case State.Open: return (current_index < headers.length); case State.Done: return false; default: assert(false); } } /** * Retrieves the next file from the archive. Note that although this does * perform IO operations, it will not read the contents of the file. * * The optional reuse argument can be used to instruct the reader to reuse * an existing ZipEntry instance. If passed a null reference, it will * create a new ZipEntry instance. */ ZipEntry get() { if( !more() ) ZipExhaustedException(); return new ZipEntry(headers[current_index++], &open_file); } /// ditto ZipEntry get(ZipEntry reuse) { if( !more() ) ZipExhaustedException(); if( reuse is null ) return new ZipEntry(headers[current_index++], &open_file); else return reuse.reset(headers[current_index++], &open_file); } /** * This is used to iterate over the contents of an archive using a foreach * loop. Please note that the iteration will reuse the ZipEntry instance * passed to your loop. If you wish to keep the instance and re-use it * later, you $(B must) use the dup member to create a copy. */ int opApply(int delegate(ref ZipEntry) dg) { int result = 0; ZipEntry entry; while( more() ) { entry = get(entry); result = dg(entry); if( result ) break; } if( entry !is null ) delete entry; return result; } private: InputStream source; InputStream seeker; //IConduit.Seek seeker; enum State { Init, Open, Done } State state; size_t current_index = 0; FileHeader[] headers; // These should be killed when the reader is closed. ubyte[] cd_data; File file_source = null; /* * This function will read the contents of the central directory. Split * or spanned archives aren't supported. */ void read_cd() in { assert( state == State.Init ); assert( headers is null ); assert( cd_data is null ); } out { assert( state == State.Open ); assert( headers !is null ); assert( cd_data !is null ); assert( current_index == 0 ); } body { //Stderr.formatln("ZipReader.read_cd()"); // First, we need to locate the end of cd record, so that we know // where the cd itself is, and how big it is. auto eocdr = read_eocd_record(); // Now, make sure the archive is all in one file. if( eocdr.data.disk_number != eocdr.data.disk_with_start_of_central_directory || eocdr.data.central_directory_entries_on_this_disk != eocdr.data.central_directory_entries_total ) ZipNotSupportedException.spanned(); // Ok, read the whole damn thing in one go. cd_data = new ubyte[eocdr.data.size_of_central_directory]; long cd_offset = eocdr.data.offset_of_start_of_cd_from_starting_disk; seeker.seek(cd_offset, seeker.Anchor.Begin); readExact(source, cd_data); // Cake. Now, we need to break it up into records. headers = new FileHeader[ eocdr.data.central_directory_entries_total]; long cdr_offset = cd_offset; // Ok, map the CD data into file headers. foreach( i,ref header ; headers ) { //Stderr.formatln(" . reading header {}...", i); // Check signature { uint sig = (cast(uint[])(cd_data[0..4]))[0]; version( BigEndian ) swap(sig); if( sig != FileHeader.signature ) ZipException.badsig("file header"); } auto used = header.map(cd_data[4..$]); assert( used <= (size_t.max-4) ); cd_data = cd_data[4+cast(size_t)used..$]; // Update offset for next record cdr_offset += 4 /* for sig. */ + used; } // Done! state = State.Open; } /* * This will locate the end of CD record in the open stream. * * This code sucks, but that's because Zip sucks. * * Basically, the EOCD record is stuffed somewhere at the end of the file. * In a brilliant move, the record is *variably sized*, which means we * have to do a linear backwards search to find it. * * The header itself (including the signature) is at minimum 22 bytes * long, plus anywhere between 0 and 2^16-1 bytes of comment. That means * we need to read the last 2^16-1 + 22 bytes from the file, and look for * the signature [0x50,0x4b,0x05,0x06] in [0 .. $-18]. * * If we find the EOCD record, we'll return its contents. If we couldn't * find it, we'll throw an exception. */ EndOfCDRecord read_eocd_record() in { assert( state == State.Init ); } body { //Stderr.formatln("read_eocd_record()"); // Signature + record + max. comment length const max_chunk_len = 4 + EndOfCDRecord.Data.sizeof + ushort.max; auto file_len = seeker.seek(0, seeker.Anchor.End); assert( file_len <= size_t.max ); // We're going to need min(max_chunk_len, file_len) bytes. size_t chunk_len = max_chunk_len; if( file_len < max_chunk_len ) chunk_len = cast(size_t) file_len; //Stderr.formatln(" . chunk_len = {}", chunk_len); // Seek back and read in the chunk. Don't forget to clean up after // ourselves. seeker.seek(-cast(long)chunk_len, seeker.Anchor.End); auto chunk_offset = seeker.seek(0, seeker.Anchor.Current); //Stderr.formatln(" . chunk_offset = {}", chunk_offset); auto chunk = new ubyte[chunk_len]; scope(exit) delete chunk; readExact(source, chunk); // Now look for our magic number. Don't forget that on big-endian // machines, we need to byteswap the value we're looking for. uint eocd_magic = EndOfCDRecord.signature; version( BigEndian ) swap(eocd_magic); size_t eocd_loc = -1; if( chunk_len >= 18 ) for( size_t i=chunk_len-18; i>=0; --i ) { if( *(cast(uint*)(chunk.ptr+i)) == eocd_magic ) { // Found the bugger! Make sure we skip the signature (forgot // to do that originally; talk about weird errors :P) eocd_loc = i+4; break; } } // If we didn't find it, then we'll assume that this is not a valid // archive. if( eocd_loc == -1 ) ZipException.missingdir; // Ok, so we found it; now what? Now we need to read the record // itself in. eocd_loc is the offset within the chunk where the eocd // record was found, so slice it out. EndOfCDRecord eocdr; eocdr.fill(chunk[eocd_loc..$]); // Excellent. We're done here. return eocdr; } /* * Opens the specified file for reading. If the raw argument passed is * true, then the file is *not* decompressed. */ InputStream open_file(FileHeader header, bool raw) { // Check to make sure that we actually *can* open this file. if( header.data.extract_version > MAX_EXTRACT_VERSION ) ZipNotSupportedException.zipver(header.data.extract_version); if( header.data.general_flags & UNSUPPORTED_FLAGS ) ZipNotSupportedException.flags(); if( toMethod(header.data.compression_method) == Method.Unsupported ) ZipNotSupportedException.method(header.data.compression_method); // Open a raw stream InputStream stream = open_file_raw(header); // If that's all they wanted, pass it back. if( raw ) return stream; // Next up, wrap in an appropriate decompression stream switch( toMethod(header.data.compression_method) ) { case Method.Store: // Do nothing: \o/ break; case Method.Deflate: // Wrap in a zlib stream. We want a raw deflate stream, // so force no encoding. stream = new ZlibInput(stream, ZlibInput.Encoding.None); break; default: assert(false); } // We done, yo! return stream; } /* * Opens a file's raw input stream. Basically, this returns a slice of * the archive's input stream. */ InputStream open_file_raw(FileHeader header) { // Seek to and parse the local file header seeker.seek(header.data.relative_offset_of_local_header, seeker.Anchor.Begin); { uint sig; readExact(source, (&sig)[0..1]); version( BigEndian ) swap(sig); if( sig != LocalFileHeader.signature ) ZipException.badsig("local file header"); } LocalFileHeader lheader; lheader.fill(source); if( !lheader.agrees_with(header) ) ZipException.incons(header.file_name); // Ok; get a slice stream for the file return new SliceSeekInputStream( source, seeker.seek(0, seeker.Anchor.Current), header.data.compressed_size); } } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // ZipBlockWriter /** * The ZipBlockWriter class is used to create a Zip archive. It uses a * writing iterator interface. * * Note that this class can only be used with output streams which can be * freely seeked. */ class ZipBlockWriter : ZipWriter { /** * Creates a ZipBlockWriter using the specified file on the local * filesystem. */ this(const(char)[] path) { file_output = new File(path, File.WriteCreate); this(file_output); } /** * Creates a ZipBlockWriter using the provided OutputStream. Please note * that this OutputStream must be attached to a conduit implementing the * IConduit.Seek interface. */ this(OutputStream output) in { assert( output !is null ); assert( (cast(IConduit.Seek) output.conduit) !is null ); } body { this.output = output; this.seeker = output; // cast(IConduit.Seek) output; // Default to Deflate compression method = Method.Deflate; } /** * Finalises the archive, writes out the central directory, and closes the * output stream. */ void finish() { put_cd(); output.close(); output = null; seeker = null; if( file_output !is null ) delete file_output; } /** * Adds a file from the local filesystem to the archive. */ void putFile(ZipEntryInfo info, const(char)[] path) { scope file = new File(path); scope(exit) file.close(); putStream(info, file); } /** * Adds a file using the contents of the given InputStream to the archive. */ void putStream(ZipEntryInfo info, InputStream source) { put_compressed(info, source); } /** * Transfers a file from another archive into this archive. Note that * this method will not perform any compression: whatever compression was * applied to the file originally will be preserved. */ void putEntry(ZipEntryInfo info, ZipEntry entry) { put_raw(info, entry); } /** * Adds a file using the contents of the given array to the archive. */ void putData(ZipEntryInfo info, const(void)[] data) { //scope mc = new MemoryConduit(data); scope mc = new Array(data.dup); scope(exit) mc.close(); put_compressed(info, mc); } /** * This property allows you to control what compression method should be * used for files being added to the archive. */ @property Method method() { return _method; } @property Method method(Method v) { return _method = v; } /// ditto private: OutputStream output; OutputStream seeker; File file_output; Method _method; struct Entry { FileHeaderData data; long header_position; const(char)[] filename; const(char)[] comment; ubyte[] extra; } Entry[] entries; void put_cd() { // check that there aren't too many CD entries if( entries.length > ushort.max ) ZipException.toomanyentries; auto cd_pos = seeker.seek(0, seeker.Anchor.Current); if( cd_pos > uint.max ) ZipException.toolong; foreach( entry ; entries ) { FileHeader header; header.data = &entry.data; header.file_name = entry.filename; header.extra_field = entry.extra; header.file_comment = entry.comment; write(output, FileHeader.signature); header.put(output); } auto cd_len = seeker.seek(0, seeker.Anchor.Current) - cd_pos; if( cd_len > uint.max ) ZipException.cdtoolong; { assert( entries.length < ushort.max ); assert( cd_len < uint.max ); assert( cd_pos < uint.max ); EndOfCDRecord eocdr; eocdr.data.central_directory_entries_on_this_disk = cast(ushort) entries.length; eocdr.data.central_directory_entries_total = cast(ushort) entries.length; eocdr.data.size_of_central_directory = cast(uint) cd_len; eocdr.data.offset_of_start_of_cd_from_starting_disk = cast(uint) cd_pos; write(output, EndOfCDRecord.signature); eocdr.put(output); } } void put_raw(ZipEntryInfo info, ZipEntry entry) { // Write out local file header LocalFileHeader.Data lhdata; auto chdata = entry.header.data; lhdata.extract_version = chdata.extract_version; // Note: we need to mask off the data descriptor bit because we aren't // going to write one. lhdata.general_flags = chdata.general_flags & ~(1<<3); lhdata.compression_method = chdata.compression_method; lhdata.crc_32 = chdata.crc_32; lhdata.compressed_size = chdata.compressed_size; lhdata.uncompressed_size = chdata.uncompressed_size; timeToDos(info.modified, lhdata.modification_file_time, lhdata.modification_file_date); put_local_header(lhdata, info.name); // Store comment entries[$-1].comment = info.comment; // Output file contents { auto input = entry.open_raw(); scope(exit) input.close(); output.copy(input).flush(); } } void put_compressed(ZipEntryInfo info, InputStream source) { debug(Zip) Stderr.formatln("ZipBlockWriter.put_compressed()"); // Write out partial local file header auto header_pos = seeker.seek(0, seeker.Anchor.Current); debug(Zip) Stderr.formatln(" . header for {} at {}", info.name, header_pos); put_local_header(info, _method); // Store comment entries[$-1].comment = info.comment; uint crc; uint compressed_size; uint uncompressed_size; // Output file contents { // Input/output chains InputStream in_chain = source; OutputStream out_chain = new WrapSeekOutputStream(output); // Count number of bytes coming in from the source file scope in_counter = new CounterInput(in_chain); in_chain = in_counter; assert( in_counter.count() <= typeof(uncompressed_size).max ); scope(success) uncompressed_size = cast(uint) in_counter.count(); // Count the number of bytes going out to the archive scope out_counter = new CounterOutput(out_chain); out_chain = out_counter; assert( out_counter.count() <= typeof(compressed_size).max ); scope(success) compressed_size = cast(uint) out_counter.count(); // Add crc scope crc_d = new Crc32(/*CRC_MAGIC*/); scope crc_s = new DigestInput(in_chain, crc_d); in_chain = crc_s; scope(success) { debug(Zip) Stderr.formatln(" . Success: storing CRC."); crc = crc_d.crc32Digest(); } // Add compression ZlibOutput compress; scope(exit) if( compress !is null ) delete compress; switch( _method ) { case Method.Store: break; case Method.Deflate: compress = new ZlibOutput(out_chain, ZlibOutput.Level.init, ZlibOutput.Encoding.None); out_chain = compress; break; default: assert(false); } // All done. scope(exit) in_chain.close(); scope(success) in_chain.flush(); scope(exit) out_chain.close(); out_chain.copy(in_chain).flush(); debug(Zip) if( compress !is null ) { Stderr.formatln(" . compressed to {} bytes", compress.written); } debug(Zip) Stderr.formatln(" . wrote {} bytes", out_counter.count); debug(Zip) Stderr.formatln(" . contents written"); } debug(Zip) Stderr.formatln(" . CRC for \"{}\": 0x{:x8}", info.name, crc); // Rewind, and patch the header auto final_pos = seeker.seek(0, seeker.Anchor.Current); seeker.seek(header_pos); patch_local_header(crc, compressed_size, uncompressed_size); // Seek back to the end of the file, and we're done! seeker.seek(final_pos); } /* * Patches the local file header starting at the current output location * with updated crc and size information. Also updates the current last * Entry. */ void patch_local_header(uint crc_32, uint compressed_size, uint uncompressed_size) { /* BUG: For some reason, this code won't compile. No idea why... if * you instantiate LFHD, it says that there is no "offsetof" property. */ /+ alias LocalFileHeaderData LFHD; static assert( LFHD.compressed_size.offsetof == LFHD.crc_32.offsetof + 4 ); static assert( LFHD.uncompressed_size.offsetof == LFHD.compressed_size.offsetof + 4 ); +/ // Don't forget we have to seek past the signature, too // BUG: .offsetof is broken here /+seeker.seek(LFHD.crc_32.offsetof+4, seeker.Anchor.Current);+/ seeker.seek(10+4, seeker.Anchor.Current); write(output, crc_32); write(output, compressed_size); write(output, uncompressed_size); with( entries[$-1] ) { data.crc_32 = crc_32; data.compressed_size = compressed_size; data.uncompressed_size = uncompressed_size; } } /* * Generates and outputs a local file header from the given info block and * compression method. Note that the crc_32, compressed_size and * uncompressed_size header fields will be set to zero, and must be * patched. */ void put_local_header(ZipEntryInfo info, Method method) { LocalFileHeader.Data data; data.compression_method = fromMethod(method); timeToDos(info.modified, data.modification_file_time, data.modification_file_date); put_local_header(data, info.name); } /* * Writes the given local file header data and filename out to the output * stream. It also appends a new Entry with the data and filename. */ void put_local_header(LocalFileHeaderData data, const(char)[] file_name) { auto f_name = Path.normalize(file_name); auto p = Path.parse(f_name); // Compute Zip version if( data.extract_version == data.extract_version.max ) { ushort zipver = 10; void minver(ushort v) { zipver = v>zipver ? v : zipver; } { // Compression method switch( data.compression_method ) { case 0: minver(10); break; case 8: minver(20); break; default: assert(false); } // File is a folder if( f_name.length > 0 && f_name[$-1] == '/' ) // Is a directory, not a real file minver(20); } data.extract_version = zipver; } /+// Encode filename auto file_name_437 = utf8_to_cp437(file_name); if( file_name_437 is null ) ZipException.fnencode;+/ /+// Set up file name length if( file_name_437.length > ushort.max ) ZipException.fntoolong; data.file_name_length = file_name_437.length;+/ LocalFileHeader header; header.data = data; if (p.isAbsolute) f_name = f_name[p.root.length+1..$]; header.file_name = f_name; // Write out the header and the filename auto header_pos = seeker.seek(0, seeker.Anchor.Current); write(output, LocalFileHeader.signature); header.put(output); // Save the header assert( header_pos <= int.max ); Entry entry; entry.data.fromLocal(header.data); entry.filename = header.file_name; entry.header_position = header_pos; entry.data.relative_offset_of_local_header = cast(int) header_pos; entries ~= entry; } } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // ZipEntry /** * This class is used to represent a single entry in an archive. * Specifically, it combines meta-data about the file (see the info field) * along with the two basic operations on an entry: open and verify. */ class ZipEntry { /** * Header information on the file. See the ZipEntryInfo structure for * more information. */ ZipEntryInfo info; /** * Size (in bytes) of the file's uncompressed contents. */ uint size() { return header.data.uncompressed_size; } /** * Opens a stream for reading from the file. The contents of this stream * represent the decompressed contents of the file stored in the archive. * * You should not assume that the returned stream is seekable. * * Note that the returned stream may be safely closed without affecting * the underlying archive stream. * * If the file has not yet been verified, then the stream will be checked * as you read from it. When the stream is either exhausted or closed, * then the integrity of the file's data will be checked. This means that * if the file is corrupt, an exception will be thrown only after you have * finished reading from the stream. If you wish to make sure the data is * valid before you read from the file, call the verify method. */ InputStream open() { // If we haven't verified yet, wrap the stream in the appropriate // decorators. if( !verified ) return new ZipEntryVerifier(this, open_dg(header, false)); else return open_dg(header, false); } /** * Verifies the contents of this file by computing the CRC32 checksum, * and comparing it against the stored one. Throws an exception if the * checksums do not match. * * Not valid on streamed Zip archives. */ void verify() { // If we haven't verified the contents yet, just read everything in // to trigger it. auto s = open(); auto buffer = new ubyte[s.conduit.bufferSize]; while( s.read(buffer) != s.Eof ) {/*Do nothing*/} s.close(); } /** * Creates a new, independent copy of this instance. */ ZipEntry dup() { return new ZipEntry(header, open_dg); } private: /* * Callback used to open the file. */ alias InputStream delegate(FileHeader, bool raw) open_dg_t; open_dg_t open_dg; /* * Raw ZIP header. */ FileHeader header; /* * The flag used to keep track of whether the file's contents have been * verified. */ bool verified = false; /* * Opens a stream that does not perform any decompression or * transformation of the file contents. This is used internally by * ZipWriter to perform fast zip to zip transfers without having to * decompress and then recompress the contents. * * Note that because zip stores CRCs for the *uncompressed* data, this * method currently does not do any verification. */ InputStream open_raw() { return open_dg(header, true); } /* * Creates a new ZipEntry from the FileHeader. */ this(FileHeader header, open_dg_t open_dg) { this.reset(header, open_dg); } /* * Resets the current instance with new values. */ ZipEntry reset(FileHeader header, open_dg_t open_dg) { this.header = header; this.open_dg = open_dg; with( info ) { name = Path.standard(header.file_name.dup); dosToTime(header.data.modification_file_time, header.data.modification_file_date, modified); comment = header.file_comment.dup; } this.verified = false; return this; } } /** * This structure contains various pieces of meta-data on a file. The * contents of this structure may be safely mutated. * * This structure is also used to specify meta-data about a file when adding * it to an archive. */ struct ZipEntryInfo { /// Full path and file name of this file. const(char)[] name; /// Modification timestamp. If this is left uninitialised when passed to /// a ZipWriter, it will be reset to the current system time. Time modified = Time.min; /// Comment on the file. const(char)[] comment; } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // Exceptions // /** * This is the base class from which all exceptions generated by this module * derive from. */ class ZipException : Exception { this(immutable(char)[] msg) { super(msg); } private: alias typeof(this) thisT; static void opCall(immutable(char)[] msg) { throw new ZipException(msg); } @property static void badsig() { thisT("corrupt signature or unexpected section found"); } @property static void badsig(const(char)[] type) { thisT("corrupt "~type.idup~" signature or unexpected section found"); } @property static void incons(const(char)[] name) { thisT("inconsistent headers for file \""~name.idup~"\"; " "archive is likely corrupted"); } @property static void missingdir() { thisT("could not locate central archive directory; " "file is corrupt or possibly not a Zip archive"); } @property static void toomanyentries() { thisT("too many archive entries"); } @property static void toolong() { thisT("archive is too long; limited to 4GB total"); } @property static void cdtoolong() { thisT("central directory is too long; limited to 4GB total"); } @property static void fntoolong() { thisT("file name too long; limited to 65,535 characters"); } @property static void eftoolong() { thisT("extra field too long; limited to 65,535 characters"); } @property static void cotoolong() { thisT("extra field too long; limited to 65,535 characters"); } @property static void fnencode() { thisT("could not encode filename into codepage 437"); } @property static void coencode() { thisT("could not encode comment into codepage 437"); } @property static void tooold() { thisT("cannot represent dates before January 1, 1980"); } } /** * This exception is thrown if a ZipReader detects that a file's contents do * not match the stored checksum. */ class ZipChecksumException : ZipException { this(const(char)[] name) { super("checksum failed on zip entry \""~name.idup~"\""); } private: static void opCall(const(char)[] name) { throw new ZipChecksumException(name); } } /** * This exception is thrown if you call get reader method when there are no * more files in the archive. */ class ZipExhaustedException : ZipException { this() { super("no more entries in archive"); } private: static void opCall() { throw new ZipExhaustedException; } } /** * This exception is thrown if you attempt to read an archive that uses * features not supported by the reader. */ class ZipNotSupportedException : ZipException { this(immutable(char)[] msg) { super(msg); } private: alias ZipNotSupportedException thisT; static void opCall(const(char)[] msg) { throw new thisT(msg.idup ~ " not supported"); } static void spanned() { thisT("split and multi-disk archives"); } static void zipver(ushort ver) { throw new thisT("zip format version " ~Integer.toString(ver / 10).idup ~"." ~Integer.toString(ver % 10).idup ~" not supported; maximum of version " ~Integer.toString(MAX_EXTRACT_VERSION / 10).idup ~"." ~Integer.toString(MAX_EXTRACT_VERSION % 10).idup ~" supported."); } static void flags() { throw new thisT("unknown or unsupported file flags enabled"); } static void method(ushort m) { // Cheat here and work out what the method *actually* is immutable(char)[] ms; switch( m ) { case 0: case 8: assert(false); // supported case 1: ms = "Shrink"; break; case 2: ms = "Reduce (factor 1)"; break; case 3: ms = "Reduce (factor 2)"; break; case 4: ms = "Reduce (factor 3)"; break; case 5: ms = "Reduce (factor 4)"; break; case 6: ms = "Implode"; break; case 9: ms = "Deflate64"; break; case 10: ms = "TERSE (old)"; break; case 12: ms = "Bzip2"; break; case 14: ms = "LZMA"; break; case 18: ms = "TERSE (new)"; break; case 19: ms = "LZ77"; break; case 97: ms = "WavPack"; break; case 98: ms = "PPMd"; break; default: ms = "unknown"; } thisT(ms ~ " compression method"); } } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // Convenience methods void createArchive(const(char)[] archive, Method method, const(char[])[] files...) { scope zw = new ZipBlockWriter(archive); zw.method = method; foreach( file ; files ) { ZipEntryInfo zi; zi.name = file; zi.modified = Path.modified(file); zw.putFile(zi, file); } zw.finish(); } void extractArchive(const(char)[] archive, const(char)[] dest) { scope zr = new ZipBlockReader(archive); foreach( entry ; zr ) { // Skip directories if( entry.info.name[$-1] == '/' || entry.info.name[$-1] == '\\') continue; auto path = Path.join(dest, entry.info.name); path = Path.normalize(path); // Create the parent directory if necessary. auto parent = Path.parse(path).parent; if( !Path.exists(parent) ) { Path.createPath(parent); } path = Path.native(path); // Write out the file scope fout = new File(path, File.WriteCreate); fout.copy(entry.open()); fout.close(); // Update timestamps auto oldTS = Path.timeStamps(path); Path.timeStamps(path, oldTS.accessed, entry.info.modified); } } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // Private implementation stuff // private: ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // Verification stuff /* * This class wraps an input stream, and computes the CRC as it passes * through. On the event of either a close or EOF, it checks the CRC against * the one in the provided ZipEntry. If they don't match, it throws an * exception. */ class ZipEntryVerifier : InputStream { this(ZipEntry entry, InputStream source) in { assert( entry !is null ); assert( source !is null ); } body { this.entry = entry; this.digest = new Crc32; this.source = new DigestInput(source, digest); } IConduit conduit() { return source.conduit; } InputStream input() { return source; } long seek (long ofs, Anchor anchor = Anchor.Begin) { return source.seek (ofs, anchor); } void close() { check(); this.source.close(); this.entry = null; this.digest = null; this.source = null; } size_t read(void[] dst) { auto bytes = source.read(dst); if( bytes == IConduit.Eof ) check(); return bytes; } override void[] load(size_t max=-1) { return Conduit.load(this, max); } override InputStream flush() { this.source.flush(); return this; } private: Crc32 digest; InputStream source; ZipEntry entry; void check() { if( digest is null ) return; auto crc = digest.crc32Digest(); delete digest; if( crc != entry.header.data.crc_32 ) ZipChecksumException(entry.info.name); else entry.verified = true; } } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // IO functions /* * Really, seriously, read some bytes without having to go through a sodding * buffer. */ void readExact(InputStream s, void[] dst) { //Stderr.formatln("readExact(s, [0..{}])", dst.length); while( dst.length > 0 ) { auto octets = s.read(dst); //Stderr.formatln(" . octets = {}", octets); if( octets == -1 ) // Beware the dangers of MAGICAL THINKING throw new Exception("unexpected end of stream"); dst = dst[octets..$]; } } /* * Really, seriously, write some bytes. */ void writeExact(OutputStream s, const(void)[] src) { while( src.length > 0 ) { auto octets = s.write(src); if( octets == -1 ) throw new Exception("unexpected end of stream"); src = src[octets..$]; } } void write(T)(OutputStream s, T value) { version( BigEndian ) swap(value); writeExact(s, (&value)[0..1]); } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // Endian garbage void swapAll(T)(ref T data) { static if( is(typeof(T.record_fields)) ) const fields = T.record_fields; else const fields = data.tupleof.length; foreach( i,_ ; data.tupleof ) { if( i == fields ) break; swap(data.tupleof[i]); } } void swap(T)(ref T data) { static if( T.sizeof == 1 ) {} else static if( T.sizeof == 2 ) ByteSwap.swap16(&data, 2); else static if( T.sizeof == 4 ) ByteSwap.swap32(&data, 4); else static if( T.sizeof == 8 ) ByteSwap.swap64(&data, 8); else static if( T.sizeof == 10 ) ByteSwap.swap80(&data, 10); else static assert(false, "Can't swap "~T.stringof~"s."); } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // IBM Code Page 437 stuff // const char[][] cp437_to_utf8_map_low = [ "\u0000"[], "\u263a", "\u263b", "\u2665", "\u2666", "\u2663", "\u2660", "\u2022", "\u25d8", "\u25cb", "\u25d9", "\u2642", "\u2640", "\u266a", "\u266b", "\u263c", "\u25b6", "\u25c0", "\u2195", "\u203c", "\u00b6", "\u00a7", "\u25ac", "\u21a8", "\u2191", "\u2193", "\u2192", "\u2190", "\u221f", "\u2194", "\u25b2", "\u25bc" ]; const char[][] cp437_to_utf8_map_high = [ "\u00c7"[], "\u00fc", "\u00e9", "\u00e2", "\u00e4", "\u00e0", "\u00e5", "\u00e7", "\u00ea", "\u00eb", "\u00e8", "\u00ef", "\u00ee", "\u00ec", "\u00c4", "\u00c5", "\u00c9", "\u00e6", "\u00c6", "\u00f4", "\u00f6", "\u00f2", "\u00fb", "\u00f9", "\u00ff", "\u00d6", "\u00dc", "\u00f8", "\u00a3", "\u00a5", "\u20a7", "\u0192", "\u00e1", "\u00ed", "\u00f3", "\u00fa", "\u00f1", "\u00d1", "\u00aa", "\u00ba", "\u00bf", "\u2310", "\u00ac", "\u00bd", "\u00bc", "\u00a1", "\u00ab", "\u00bb", "\u2591", "\u2592", "\u2593", "\u2502", "\u2524", "\u2561", "\u2562", "\u2556", "\u2555", "\u2563", "\u2551", "\u2557", "\u255d", "\u255c", "\u255b", "\u2510", "\u2514", "\u2534", "\u252c", "\u251c", "\u2500", "\u253c", "\u255e", "\u255f", "\u255a", "\u2554", "\u2569", "\u2566", "\u2560", "\u2550", "\u256c", "\u2567", "\u2568", "\u2564", "\u2565", "\u2559", "\u2558", "\u2552", "\u2553", "\u256b", "\u256a", "\u2518", "\u250c", "\u2588", "\u2584", "\u258c", "\u2590", "\u2580", "\u03b1", "\u00df", "\u0393", "\u03c0", "\u03a3", "\u03c3", "\u00b5", "\u03c4", "\u03a6", "\u0398", "\u03a9", "\u03b4", "\u221e", "\u03c6", "\u03b5", "\u2229", "\u2261", "\u00b1", "\u2265", "\u2264", "\u2320", "\u2321", "\u00f7", "\u2248", "\u00b0", "\u2219", "\u00b7", "\u221a", "\u207f", "\u00b2", "\u25a0", "\u00a0" ]; inout(char[]) cp437_to_utf8(inout(ubyte[]) s) { foreach( i,c ; s ) { if( (1 <= c && c <= 31) || c >= 127 ) { /* Damn; we got a character not in ASCII. Since this is the first * non-ASCII character we found, copy everything up to this point * into the output verbatim. We'll allocate twice as much space * as there are remaining characters to ensure we don't need to do * any further allocations. */ auto r = new char[i+2*(s.length-i)]; r[0..i] = cast(char[]) s[0..i]; size_t k=i; // current length // We insert new characters at r[i+j+k] foreach( d ; s[i..$] ) { if( 32 <= d && d <= 126 || d == 0 ) { r[k++] = d; } else if( 1 <= d && d <= 31 ) { const(char)[] repl = cp437_to_utf8_map_low[d]; r[k..k+repl.length] = repl[]; k += repl.length; } else if( d == 127 ) { const(char)[] repl = "\u2302"; r[k..k+repl.length] = repl[]; k += repl.length; } else if( d > 127 ) { const(char)[] repl = cp437_to_utf8_map_high[d-128]; r[k..k+repl.length] = repl[]; k += repl.length; } else assert(false); } return cast(typeof(return))r[0..k]; } } /* If we got here, then all the characters in s are also in ASCII, which * means it's also valid UTF-8; return the string unmodified. */ return cast(typeof(return)) s; } debug( UnitTest ) { unittest { const(char)[] c(const(char)[] s) { return cp437_to_utf8(cast(const(ubyte)[]) s); } auto s = c("Hi there \x01 old \x0c!"); assert( s == "Hi there \u263a old \u2640!", "\""~s~"\"" ); s = c("Marker \x7f and divide \xf6."); assert( s == "Marker \u2302 and divide \u00f7.", "\""~s~"\"" ); } } __gshared const char[dchar] utf8_to_cp437_map; shared static this() { utf8_to_cp437_map = [ '\u0000': '\x00', '\u263a': '\x01', '\u263b': '\x02', '\u2665': '\x03', '\u2666': '\x04', '\u2663': '\x05', '\u2660': '\x06', '\u2022': '\x07', '\u25d8': '\x08', '\u25cb': '\x09', '\u25d9': '\x0a', '\u2642': '\x0b', '\u2640': '\x0c', '\u266a': '\x0d', '\u266b': '\x0e', '\u263c': '\x0f', '\u25b6': '\x10', '\u25c0': '\x11', '\u2195': '\x12', '\u203c': '\x13', '\u00b6': '\x14', '\u00a7': '\x15', '\u25ac': '\x16', '\u21a8': '\x17', '\u2191': '\x18', '\u2193': '\x19', '\u2192': '\x1a', '\u2190': '\x1b', '\u221f': '\x1c', '\u2194': '\x1d', '\u25b2': '\x1e', '\u25bc': '\x1f', /* * Printable ASCII range (well, most of it) is handled specially. */ '\u00c7': '\x80', '\u00fc': '\x81', '\u00e9': '\x82', '\u00e2': '\x83', '\u00e4': '\x84', '\u00e0': '\x85', '\u00e5': '\x86', '\u00e7': '\x87', '\u00ea': '\x88', '\u00eb': '\x89', '\u00e8': '\x8a', '\u00ef': '\x8b', '\u00ee': '\x8c', '\u00ec': '\x8d', '\u00c4': '\x8e', '\u00c5': '\x8f', '\u00c9': '\x90', '\u00e6': '\x91', '\u00c6': '\x92', '\u00f4': '\x93', '\u00f6': '\x94', '\u00f2': '\x95', '\u00fb': '\x96', '\u00f9': '\x97', '\u00ff': '\x98', '\u00d6': '\x99', '\u00dc': '\x9a', '\u00f8': '\x9b', '\u00a3': '\x9c', '\u00a5': '\x9d', '\u20a7': '\x9e', '\u0192': '\x9f', '\u00e1': '\xa0', '\u00ed': '\xa1', '\u00f3': '\xa2', '\u00fa': '\xa3', '\u00f1': '\xa4', '\u00d1': '\xa5', '\u00aa': '\xa6', '\u00ba': '\xa7', '\u00bf': '\xa8', '\u2310': '\xa9', '\u00ac': '\xaa', '\u00bd': '\xab', '\u00bc': '\xac', '\u00a1': '\xad', '\u00ab': '\xae', '\u00bb': '\xaf', '\u2591': '\xb0', '\u2592': '\xb1', '\u2593': '\xb2', '\u2502': '\xb3', '\u2524': '\xb4', '\u2561': '\xb5', '\u2562': '\xb6', '\u2556': '\xb7', '\u2555': '\xb8', '\u2563': '\xb9', '\u2551': '\xba', '\u2557': '\xbb', '\u255d': '\xbc', '\u255c': '\xbd', '\u255b': '\xbe', '\u2510': '\xbf', '\u2514': '\xc0', '\u2534': '\xc1', '\u252c': '\xc2', '\u251c': '\xc3', '\u2500': '\xc4', '\u253c': '\xc5', '\u255e': '\xc6', '\u255f': '\xc7', '\u255a': '\xc8', '\u2554': '\xc9', '\u2569': '\xca', '\u2566': '\xcb', '\u2560': '\xcc', '\u2550': '\xcd', '\u256c': '\xce', '\u2567': '\xcf', '\u2568': '\xd0', '\u2564': '\xd1', '\u2565': '\xd2', '\u2559': '\xd3', '\u2558': '\xd4', '\u2552': '\xd5', '\u2553': '\xd6', '\u256b': '\xd7', '\u256a': '\xd8', '\u2518': '\xd9', '\u250c': '\xda', '\u2588': '\xdb', '\u2584': '\xdc', '\u258c': '\xdd', '\u2590': '\xde', '\u2580': '\xdf', '\u03b1': '\xe0', '\u00df': '\xe1', '\u0393': '\xe2', '\u03c0': '\xe3', '\u03a3': '\xe4', '\u03c3': '\xe5', '\u00b5': '\xe6', '\u03c4': '\xe7', '\u03a6': '\xe8', '\u0398': '\xe9', '\u03a9': '\xea', '\u03b4': '\xeb', '\u221e': '\xec', '\u03c6': '\xed', '\u03b5': '\xee', '\u2229': '\xef', '\u2261': '\xf0', '\u00b1': '\xf1', '\u2265': '\xf2', '\u2264': '\xf3', '\u2320': '\xf4', '\u2321': '\xf5', '\u00f7': '\xf6', '\u2248': '\xf7', '\u00b0': '\xf8', '\u2219': '\xf9', '\u00b7': '\xfa', '\u221a': '\xfb', '\u207f': '\xfc', '\u00b2': '\xfd', '\u25a0': '\xfe', '\u00a0': '\xff' ]; } inout(ubyte)[] utf8_to_cp437(inout(char)[] s) { alias typeof(return) ret_type; /* Some sort of strange bug here */ ubyte[] bug_6867(const(char)[] cs) { foreach( i,dchar c ; cs ) { if( !((32 <= c && c <= 126) || c == 0) ) { /* We got a character not in CP 437: we need to create a buffer to * hold the new string. Since UTF-8 is *always* larger than CP * 437, we need, at most, an array of the same number of elements. */ auto r = new ubyte[cs.length]; r[0..i] = cast(ubyte[]) cs[0..i]; size_t k=i; foreach( dchar d ; cs[i..$] ) { if( 32 <= d && d <= 126 || d == 0 ) r[k++] = cast(ubyte)d; else if( d == '\u2302' ) r[k++] = '\x7f'; else if( auto e_ptr = d in utf8_to_cp437_map ) r[k++] = *e_ptr; else { throw new Exception("cannot encode character \"" ~ Integer.toString(cast(uint)d).idup ~ "\" in codepage 437."); } } return r[0..k]; } } return null; } auto ret = bug_6867(s); if (ret !is null) return cast(ret_type)ret; // If we got here, then the entire string is printable ASCII, which just // happens to *also* be valid CP 437! Huzzah! return cast(typeof(return)) s; } debug( UnitTest ) { unittest { alias cp437_to_utf8 x; alias utf8_to_cp437 y; ubyte[256] s; foreach( i,ref c ; s ) c = cast(ubyte)i; auto a = x(s); auto b = y(a); if(!( b == s )) { // Display list of characters that failed to convert as expected, // and what value we got. auto hex = "0123456789abcdef"; auto msg = "".dup; foreach( i,ch ; b ) { if( ch != i ) { msg ~= hex[i>>4]; msg ~= hex[i&15]; msg ~= " ("; msg ~= hex[ch>>4]; msg ~= hex[ch&15]; msg ~= "), "; } } msg ~= "failed."; assert( false, msg ); } } } /* * This is here to simplify the code elsewhere. */ inout(char[]) utf8_to_utf8(inout(ubyte[]) s) { return cast(typeof(return)) s; } ubyte[] utf8_to_utf8(char[] s) { return cast(ubyte[]) s; } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// // // Date/time stuff void dosToTime(ushort dostime, ushort dosdate, out Time time) { uint sec, min, hour, day, mon, year; sec = (dostime & 0b00000_000000_11111) * 2; min = (dostime & 0b00000_111111_00000) >> 5; hour= (dostime & 0b11111_000000_00000) >> 11; day = (dosdate & 0b0000000_0000_11111); mon = (dosdate & 0b0000000_1111_00000) >> 5; year=((dosdate & 0b1111111_0000_00000) >> 9) + 1980; // This code rules! time = Gregorian.generic.toTime(year, mon, day, hour, min, sec); } void timeToDos(Time time, out ushort dostime, out ushort dosdate) { // Treat Time.min specially if( time == Time.min ) time = WallClock.now; // *muttering happily* auto date = Gregorian.generic.toDate(time); if( date.year < 1980 ) ZipException.tooold; auto tod = time.time(); dostime = cast(ushort) ( (tod.seconds / 2) | (tod.minutes << 5) | (tod.hours << 11)); dosdate = cast(ushort) ( (date.day) | (date.month << 5) | ((date.year - 1980) << 9)); } // ************************************************************************** // // ************************************************************************** // // ************************************************************************** // // Dependencies private: import tango.io.device.Conduit : Conduit; /******************************************************************************* copyright: Copyright © 2007 Daniel Keep. All rights reserved. license: BSD style: $(LICENSE) version: Prerelease author: Daniel Keep *******************************************************************************/ //module tangox.io.stream.CounterStream; //import tango.io.device.Conduit : Conduit; //import tango.io.model.IConduit : IConduit, InputStream, OutputStream; /** * The counter stream classes are used to keep track of how many bytes flow * through a stream. * * To use them, simply wrap it around an existing stream. The number of bytes * that have flowed through the wrapped stream may be accessed using the * count member. */ class CounterInput : InputStream { /// this(InputStream input) in { assert( input !is null ); } body { this.source = input; } override IConduit conduit() { return source.conduit; } InputStream input() { return source; } long seek (long ofs, Anchor anchor = Anchor.Begin) { return source.seek (ofs, anchor); } override void close() { source.close(); source = null; } override size_t read(void[] dst) { auto read = source.read(dst); if( read != IConduit.Eof ) _count += read; return read; } override void[] load(size_t max=-1) { return Conduit.load(this, max); } override InputStream flush() { source.flush(); return this; } /// long count() { return _count; } private: InputStream source; long _count; } /// ditto class CounterOutput : OutputStream { /// this(OutputStream output) in { assert( output !is null ); } body { this.sink = output; } override IConduit conduit() { return sink.conduit; } OutputStream output() { return sink; } long seek (long ofs, Anchor anchor = Anchor.Begin) { return sink.seek (ofs, anchor); } override void close() { sink.close(); sink = null; } override size_t write(const(void)[] dst) { auto wrote = sink.write(dst); if( wrote != IConduit.Eof ) _count += wrote; return wrote; } override OutputStream copy(InputStream src, size_t max=-1) { Conduit.transfer(src, this, max); return this; } override OutputStream flush() { sink.flush(); return this; } /// long count() { return _count; } private: OutputStream sink; long _count; } /******************************************************************************* copyright: Copyright © 2007 Daniel Keep. All rights reserved. license: BSD style: $(LICENSE) version: Prerelease author: Daniel Keep *******************************************************************************/ //module tangox.io.stream.SliceStream; //import tango.io.device.Conduit : Conduit; //import tango.io.model.IConduit : IConduit, InputStream, OutputStream; /** * This stream can be used to provide stream-based access to a subset of * another stream. It is akin to slicing an array. * * This stream fully supports seeking, and as such requires that the * underlying stream also support seeking. */ class SliceSeekInputStream : InputStream { //alias IConduit.Seek.Anchor Anchor; /** * Create a new slice stream from the given source, covering the content * starting at position begin, for length bytes. */ this(InputStream source, long begin, long length) in { assert( source !is null ); assert( (cast(IConduit.Seek) source.conduit) !is null ); assert( begin >= 0 ); assert( length >= 0 ); } body { this.source = source; this.seeker = source; //cast(IConduit.Seek) source; this.begin = begin; this.length = length; } override IConduit conduit() { return source.conduit; } override void close() { source = null; seeker = null; } override size_t read(void[] dst) { // If we're at the end of the slice, return eof if( _position >= length ) return IConduit.Eof; // Otherwise, make sure we don't try to read past the end of the slice if( _position+dst.length > length ) dst.length = cast(size_t) (length-_position); // Seek source stream to the appropriate location. if( seeker.seek(0, Anchor.Current) != begin+_position ) seeker.seek(begin+_position, Anchor.Begin); // Do the read auto read = source.read(dst); if( read == IConduit.Eof ) // If we got an Eof, we'll consider that a bug for the moment. // TODO: proper exception throw new Exception("unexpected end-of-stream"); _position += read; return read; } override void[] load(size_t max=-1) { return Conduit.load(this, max); } override InputStream flush() { source.flush(); return this; } InputStream input() { return source; } override long seek(long offset, Anchor anchor = cast(Anchor)0) { switch( anchor ) { case Anchor.Begin: _position = offset; break; case Anchor.Current: _position += offset; if( _position < 0 ) _position = 0; break; case Anchor.End: _position = length+offset; if( _position < 0 ) _position = 0; break; default: assert(false); } return _position; } private: InputStream source; InputStream seeker; long _position, begin, length; invariant() { assert( cast(Object) source is cast(Object) seeker ); assert( begin >= 0 ); assert( length >= 0 ); assert( _position >= 0 ); } } /** * This stream can be used to provide stream-based access to a subset of * another stream. It is akin to slicing an array. */ class SliceInputStream : InputStream { /** * Create a new slice stream from the given source, covering the content * starting at the current seek position for length bytes. */ this(InputStream source, long length) in { assert( source !is null ); assert( length >= 0 ); } body { this.source = source; this._length = length; } override IConduit conduit() { return source.conduit; } override void close() { source = null; } InputStream input() { return source; } long seek (long ofs, Anchor anchor = Anchor.Begin) { return source.seek (ofs, anchor); } override size_t read(void[] dst) { // If we're at the end of the slice, return eof if( _length <= 0 ) return IConduit.Eof; // Otherwise, make sure we don't try to read past the end of the slice if( dst.length > _length ) dst.length = cast(size_t) _length; // Do the read auto read = source.read(dst); if( read == IConduit.Eof ) // If we got an Eof, we'll consider that a bug for the moment. // TODO: proper exception throw new Exception("unexpected end-of-stream"); _length -= read; return read; } override void[] load(size_t max=-1) { return Conduit.load(this, max); } override InputStream flush() { source.flush(); return this; } private: InputStream source; long _length; invariant() { if( _length > 0 ) assert( source !is null ); } } /** * This stream can be used to provide stream-based access to a subset of * another stream. It is akin to slicing an array. * * This stream fully supports seeking, and as such requires that the * underlying stream also support seeking. */ class SliceSeekOutputStream : OutputStream { //alias IConduit.Seek.Anchor Anchor; /** * Create a new slice stream from the given source, covering the content * starting at position begin, for length bytes. */ this(OutputStream source, long begin, long length) in { assert( (cast(IConduit.Seek) source.conduit) !is null ); assert( begin >= 0 ); assert( length >= 0 ); } body { this.source = source; this.seeker = source; //cast(IConduit.Seek) source; this.begin = begin; this.length = length; } override IConduit conduit() { return source.conduit; } override void close() { source = null; seeker = null; } size_t write(const(void)[] src) { // If we're at the end of the slice, return eof if( _position >= length ) return IConduit.Eof; // Otherwise, make sure we don't try to write past the end of the // slice if( _position+src.length > length ) src.length = cast(size_t) (length-_position); // Seek source stream to the appropriate location. if( seeker.seek(0, Anchor.Current) != begin+_position ) seeker.seek(begin+_position, Anchor.Begin); // Do the write auto wrote = source.write(src); if( wrote == IConduit.Eof ) // If we got an Eof, we'll consider that a bug for the moment. // TODO: proper exception throw new Exception("unexpected end-of-stream"); _position += wrote; return wrote; } override OutputStream copy(InputStream src, size_t max=-1) { Conduit.transfer(src, this, max); return this; } override OutputStream flush() { source.flush(); return this; } override OutputStream output() { return source; } override long seek(long offset, Anchor anchor = cast(Anchor)0) { switch( anchor ) { case Anchor.Begin: _position = offset; break; case Anchor.Current: _position += offset; if( _position < 0 ) _position = 0; break; case Anchor.End: _position = length+offset; if( _position < 0 ) _position = 0; break; default: assert(false); } return _position; } private: OutputStream source; OutputStream seeker; long _position, begin, length; invariant() { assert( cast(Object) source is cast(Object) seeker ); assert( begin >= 0 ); assert( length >= 0 ); assert( _position >= 0 ); } } /******************************************************************************* copyright: Copyright © 2007 Daniel Keep. All rights reserved. license: BSD style: $(LICENSE) version: Prerelease author: Daniel Keep *******************************************************************************/ //module tangox.io.stream.WrapStream; //import tango.io.device.Conduit : Conduit; //import tango.io.model.IConduit : IConduit, InputStream, OutputStream; /** * This stream can be used to provide access to another stream. * Its distinguishing feature is that users cannot close the underlying * stream. * * This stream fully supports seeking, and as such requires that the * underlying stream also support seeking. */ class WrapSeekInputStream : InputStream { //alias IConduit.Seek.Anchor Anchor; /** * Create a new wrap stream from the given source. */ this(InputStream source) in { assert( source !is null ); assert( (cast(IConduit.Seek) source.conduit) !is null ); } body { this.source = source; this.seeker = source; //cast(IConduit.Seek) source; this._position = seeker.seek(0, Anchor.Current); } /// ditto this(InputStream source, long position) in { assert( position >= 0 ); } body { this(source); this._position = position; } override IConduit conduit() { return source.conduit; } override void close() { source = null; seeker = null; } override size_t read(void[] dst) { if( seeker.seek(0, Anchor.Current) != _position ) seeker.seek(_position, Anchor.Begin); auto read = source.read(dst); if( read != IConduit.Eof ) _position += read; return read; } override void[] load(size_t max=-1) { return Conduit.load(this, max); } override InputStream flush() { source.flush(); return this; } InputStream input() { return source; } override long seek(long offset, Anchor anchor = cast(Anchor)0) { seeker.seek(_position, Anchor.Begin); return (_position = seeker.seek(offset, anchor)); } private: InputStream source; InputStream seeker; long _position; invariant() { assert( cast(Object) source is cast(Object) seeker ); assert( _position >= 0 ); } } /** * This stream can be used to provide access to another stream. * Its distinguishing feature is that the users cannot close the underlying * stream. * * This stream fully supports seeking, and as such requires that the * underlying stream also support seeking. */ class WrapSeekOutputStream : OutputStream { //alias IConduit.Seek.Anchor Anchor; /** * Create a new wrap stream from the given source. */ this(OutputStream source) in { assert( (cast(IConduit.Seek) source.conduit) !is null ); } body { this.source = source; this.seeker = source; //cast(IConduit.Seek) source; this._position = seeker.seek(0, Anchor.Current); } /// ditto this(OutputStream source, long position) in { assert( position >= 0 ); } body { this(source); this._position = position; } override IConduit conduit() { return source.conduit; } override void close() { source = null; seeker = null; } size_t write(const(void)[] src) { if( seeker.seek(0, Anchor.Current) != _position ) seeker.seek(_position, Anchor.Begin); auto wrote = source.write(src); if( wrote != IConduit.Eof ) _position += wrote; return wrote; } override OutputStream copy(InputStream src, size_t max=-1) { Conduit.transfer(src, this, max); return this; } override OutputStream flush() { source.flush(); return this; } override OutputStream output() { return source; } override long seek(long offset, Anchor anchor = cast(Anchor)0) { seeker.seek(_position, Anchor.Begin); return (_position = seeker.seek(offset, anchor)); } private: OutputStream source; OutputStream seeker; long _position; invariant() { assert( cast(Object) source is cast(Object) seeker ); assert( _position >= 0 ); } } |