Skip to content

Commit

Permalink
Adding support for ipfs large files.
Browse files Browse the repository at this point in the history
  • Loading branch information
mijovic committed Mar 11, 2020
1 parent 59071f6 commit 37878cf
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 38 deletions.
3 changes: 1 addition & 2 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ Language Features:


Compiler Features:
* Metadata: Added support for IPFS hashes of large files that need to be split in multiple chunks.


Bugfixes:



### 0.6.4 (2020-03-10)

Language Features:
Expand All @@ -30,7 +30,6 @@ Bugfixes:
* SMTChecker: Fix internal errors when analysing tuples.
* Yul AST Import: correctly import blocks as statements, switch statements and string literals.


### 0.6.3 (2020-02-18)

Language Features:
Expand Down
6 changes: 1 addition & 5 deletions libsolidity/interface/CompilerStack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -899,8 +899,7 @@ h256 const& CompilerStack::Source::swarmHash() const
string const& CompilerStack::Source::ipfsUrl() const
{
if (ipfsUrlCached.empty())
if (scanner->source().size() < 1024 * 256)
ipfsUrlCached = "dweb:/ipfs/" + util::ipfsHashBase58(scanner->source());
ipfsUrlCached = "dweb:/ipfs/" + util::ipfsHashBase58(scanner->source());
return ipfsUrlCached;
}

Expand Down Expand Up @@ -1373,10 +1372,7 @@ bytes CompilerStack::createCBORMetadata(string const& _metadata, bool _experimen
MetadataCBOREncoder encoder;

if (m_metadataHash == MetadataHash::IPFS)
{
solAssert(_metadata.length() < 1024 * 256, "Metadata too large.");
encoder.pushBytes("ipfs", util::ipfsHash(_metadata));
}
else if (m_metadataHash == MetadataHash::Bzzr1)
encoder.pushBytes("bzzr1", util::bzzr1Hash(_metadata).asBytes());
else
Expand Down
155 changes: 133 additions & 22 deletions libsolutil/IpfsHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,21 @@ bytes varintEncoding(size_t _n)
return encoded;
}

bytes encodeByteArray(bytes const& _data)
{
return bytes{0x0a} + varintEncoding(_data.size()) + _data;
}

bytes encodeHash(bytes const& _data)
{
return bytes{0x12, 0x20} + picosha2::hash256(_data);
}

bytes encodeLinkData(bytes const& _data)
{
return bytes{0x12} + varintEncoding(_data.size()) + _data;
}

string base58Encode(bytes const& _data)
{
static string const alphabet{"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"};
Expand All @@ -53,36 +68,132 @@ string base58Encode(bytes const& _data)
reverse(output.begin(), output.end());
return output;
}

struct Chunk
{
Chunk() = default;
Chunk(bytes _hash, size_t _size, size_t _blockSize):
hash(std::move(_hash)),
size(_size),
blockSize(_blockSize)
{}

bytes hash = {};
size_t size = 0;
size_t blockSize = 0;
};

using Chunks = vector<Chunk>;

Chunk combineLinks(Chunks& _links)
{
bytes data = {};
bytes lengths = {};
Chunk chunk = {};
for (Chunk& link: _links)
{
chunk.size += link.size;
chunk.blockSize += link.blockSize;

data += encodeLinkData(
bytes {0x0a} +
varintEncoding(link.hash.size()) +
std::move(link.hash) +
bytes{0x12, 0x00, 0x18} +
varintEncoding(link.blockSize)
);

lengths += bytes{0x20} + varintEncoding(link.size);
}

bytes blockData = data + encodeByteArray(bytes{0x08, 0x02, 0x18} + varintEncoding(chunk.size) + lengths);

chunk.blockSize += blockData.size();
chunk.hash = encodeHash(blockData);

return chunk;
}

Chunks buildNextLevel(Chunks& _currentLevel)
{
size_t const maxChildNum = 174;

Chunks nextLevel;
Chunks links;

for (Chunk& chunk: _currentLevel)
{
links.emplace_back(std::move(chunk.hash), chunk.size, chunk.blockSize);
if (links.size() == maxChildNum)
{
nextLevel.emplace_back(combineLinks(links));
links = {};
}
}
if (!links.empty())
nextLevel.emplace_back(combineLinks(links));

return nextLevel;
}

/// Builds a tree starting from the bottom level where nodes are data nodes.
/// Data nodes should be calculated and passed as the only level in chunk levels
/// Each next level is calculated as following:
/// - Pick up to maxChildNum (174) nodes until a whole level is added, group them and pass to the node in the next level
/// - Do this until the current level has only one node, return the hash in that node
bytes groupChunksBottomUp(Chunks _currentLevel)
{
// when we reach root it will be the only node in that level
while (_currentLevel.size() != 1)
_currentLevel = buildNextLevel(_currentLevel);

// top level's only node stores the hash for file
return _currentLevel.front().hash;
}
}

bytes solidity::util::ipfsHash(string _data)
{
assertThrow(_data.length() < 1024 * 256, DataTooLong, "IPFS hash for large (chunked) files not yet implemented.");
size_t const maxChunkSize = 1024 * 256;
size_t chunkCount = _data.length() / maxChunkSize + (_data.length() % maxChunkSize > 0 ? 1 : 0);
chunkCount = chunkCount == 0 ? 1 : chunkCount;

bytes lengthAsVarint = varintEncoding(_data.size());
Chunks allChunks;

bytes protobufEncodedData;
// Type: File
protobufEncodedData += bytes{0x08, 0x02};
if (!_data.empty())
for (unsigned long chunkIndex = 0; chunkIndex < chunkCount; chunkIndex++)
{
// Data (length delimited bytes)
protobufEncodedData += bytes{0x12};
protobufEncodedData += lengthAsVarint;
protobufEncodedData += asBytes(std::move(_data));
bytes chunkBytes = asBytes(
_data.substr(chunkIndex * maxChunkSize, min(maxChunkSize, _data.length() - chunkIndex * maxChunkSize))
);

bytes lengthAsVarint = varintEncoding(chunkBytes.size());

bytes protobufEncodedData;
// Type: File
protobufEncodedData += bytes{0x08, 0x02};
if (!chunkBytes.empty())
{
// Data (length delimited bytes)
protobufEncodedData += bytes{0x12};
protobufEncodedData += lengthAsVarint;
protobufEncodedData += chunkBytes;
}
// filesize: length as varint
protobufEncodedData += bytes{0x18} + lengthAsVarint;

// PBDag:
// Data: (length delimited bytes)
bytes blockData = encodeByteArray(protobufEncodedData);

// Multihash: sha2-256, 256 bits
allChunks.emplace_back(
encodeHash(blockData),
chunkBytes.size(),
blockData.size()
);
}
// filesize: length as varint
protobufEncodedData += bytes{0x18} + lengthAsVarint;

// PBDag:
// Data: (length delimited bytes)
size_t protobufLength = protobufEncodedData.size();
bytes blockData = bytes{0x0a} + varintEncoding(protobufLength) + std::move(protobufEncodedData);
// TODO Handle "large" files with multiple blocks

// Multihash: sha2-256, 256 bits
bytes hash = bytes{0x12, 0x20} + picosha2::hash256(std::move(blockData));
return hash;

return groupChunksBottomUp(std::move(allChunks));
}

string solidity::util::ipfsHashBase58(string _data)
Expand Down
53 changes: 44 additions & 9 deletions test/libsolutil/IpfsHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,50 @@ BOOST_AUTO_TEST_CASE(test_largest_unchunked)
BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmbNDspMkzkMFKyS3eCJGedG7GWRQHSCzJCZLjxP7wyVAx");
}

// TODO This needs chunking implemented
//BOOST_AUTO_TEST_CASE(test_large)
//{
// size_t length = 1310710;
// string data;
// data.resize(length, 0);
// BOOST_REQUIRE_EQUAL(data.size(), length);
// BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmNg7BJo8gEMDK8yGQbHEwPtycesnE6FUULX5iVd5TAL9f");
//}
BOOST_AUTO_TEST_CASE(test_smallest_chunked)
{
size_t length = 1024 * 256 + 1;
string data;
data.resize(length, 0);
BOOST_REQUIRE_EQUAL(data.size(), length);
BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmbVuw4C4vcmVKqxoWtgDVobvcHrSn51qsmQmyxjk4sB2Q");
}

BOOST_AUTO_TEST_CASE(test_large)
{
size_t length = 1310710;
string data;
data.resize(length, 0);
BOOST_REQUIRE_EQUAL(data.size(), length);
BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmNg7BJo8gEMDK8yGQbHEwPtycesnE6FUULX5iVd5TAL9f");
}

BOOST_AUTO_TEST_CASE(test_largest_one_level)
{
size_t length = 45613056; // 1024 * 256 * 174;
string data;
data.resize(length, 0);
BOOST_REQUIRE_EQUAL(data.size(), length);
BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmY4HSz1oVGdUzb8poVYPLsoqBZjH6LZrtgnme9wWn2Qko");
}

BOOST_AUTO_TEST_CASE(test_smallest_multi_level)
{
size_t length = 45613057; // 1024 * 256 * 174 + 1;
string data;
data.resize(length, 0);
BOOST_REQUIRE_EQUAL(data.size(), length);
BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmehMASWcBsX7VcEQqs6rpR5AHoBfKyBVEgmkJHjpPg8jq");
}

BOOST_AUTO_TEST_CASE(test_multi_level_tree)
{
size_t length = 46661632;
string data;
data.resize(length, 0);
BOOST_REQUIRE_EQUAL(data.size(), length);
BOOST_CHECK_EQUAL(ipfsHashBase58(data), "QmaTb1sT9hrSXJLmf8bxJ9NuwndiHuMLsgNLgkS2eXu3Xj");
}

BOOST_AUTO_TEST_SUITE_END()

Expand Down

0 comments on commit 37878cf

Please sign in to comment.