mirror of https://github.com/OpenRCT2/OpenRCT2.git
342 lines
12 KiB
C++
342 lines
12 KiB
C++
/*****************************************************************************
|
|
* Copyright (c) 2014-2024 OpenRCT2 developers
|
|
*
|
|
* For a complete list of all authors, please refer to contributors.md
|
|
* Interested in contributing? Visit https://github.com/OpenRCT2/OpenRCT2
|
|
*
|
|
* OpenRCT2 is licensed under the GNU General Public License version 3.
|
|
*****************************************************************************/
|
|
|
|
#pragma once
|
|
|
|
#include "../Context.h"
|
|
#include "../common.h"
|
|
#include "Console.hpp"
|
|
#include "DataSerialiser.h"
|
|
#include "File.h"
|
|
#include "FileScanner.h"
|
|
#include "FileStream.h"
|
|
#include "JobPool.h"
|
|
#include "Numerics.hpp"
|
|
#include "Path.hpp"
|
|
|
|
#include <chrono>
|
|
#include <list>
|
|
#include <string>
|
|
#include <tuple>
|
|
#include <vector>
|
|
|
|
template<typename TItem> class FileIndex
|
|
{
|
|
private:
|
|
struct DirectoryStats
|
|
{
|
|
uint32_t TotalFiles = 0;
|
|
uint64_t TotalFileSize = 0;
|
|
uint32_t FileDateModifiedChecksum = 0;
|
|
uint32_t PathChecksum = 0;
|
|
};
|
|
|
|
struct ScanResult
|
|
{
|
|
DirectoryStats const Stats;
|
|
std::vector<std::string> const Files;
|
|
|
|
ScanResult(DirectoryStats stats, std::vector<std::string>&& files) noexcept
|
|
: Stats(stats)
|
|
, Files(std::move(files))
|
|
{
|
|
}
|
|
};
|
|
|
|
struct FileIndexHeader
|
|
{
|
|
uint32_t HeaderSize = sizeof(FileIndexHeader);
|
|
uint32_t MagicNumber = 0;
|
|
uint8_t VersionA = 0;
|
|
uint8_t VersionB = 0;
|
|
uint16_t LanguageId = 0;
|
|
DirectoryStats Stats;
|
|
uint32_t NumItems = 0;
|
|
};
|
|
|
|
// Index file format version which when incremented forces a rebuild
|
|
static constexpr uint8_t FILE_INDEX_VERSION = 4;
|
|
|
|
std::string const _name;
|
|
uint32_t const _magicNumber;
|
|
uint8_t const _version;
|
|
std::string const _indexPath;
|
|
std::string const _pattern;
|
|
|
|
public:
|
|
std::vector<std::string> const SearchPaths;
|
|
|
|
public:
|
|
/**
|
|
* Creates a new FileIndex.
|
|
* @param name Name of the index (used for logging).
|
|
* @param magicNumber Magic number for the index (to distinguish between different index files).
|
|
* @param version Version of the specialised index, increment this to force a rebuild.
|
|
* @param indexPath Full path to read and write the index file to.
|
|
* @param pattern The search pattern for indexing files.
|
|
* @param paths A list of search directories.
|
|
*/
|
|
FileIndex(
|
|
std::string&& name, uint32_t magicNumber, uint8_t version, std::string&& indexPath, std::string&& pattern,
|
|
std::vector<std::string>&& paths) noexcept
|
|
: _name(std::move(name))
|
|
, _magicNumber(magicNumber)
|
|
, _version(version)
|
|
, _indexPath(std::move(indexPath))
|
|
, _pattern(std::move(pattern))
|
|
, SearchPaths(std::move(paths))
|
|
{
|
|
}
|
|
|
|
virtual ~FileIndex() = default;
|
|
|
|
/**
|
|
* Queries and directories and loads the index header. If the index is up to date,
|
|
* the items are loaded from the index and returned, otherwise the index is rebuilt.
|
|
*/
|
|
std::vector<TItem> LoadOrBuild(int32_t language) const
|
|
{
|
|
std::vector<TItem> items;
|
|
auto scanResult = Scan();
|
|
auto readIndexResult = ReadIndexFile(language, scanResult.Stats);
|
|
if (std::get<0>(readIndexResult))
|
|
{
|
|
// Index was loaded
|
|
items = std::get<1>(readIndexResult);
|
|
}
|
|
else
|
|
{
|
|
// Index was not loaded
|
|
items = Build(language, scanResult);
|
|
}
|
|
return items;
|
|
}
|
|
|
|
std::vector<TItem> Rebuild(int32_t language) const
|
|
{
|
|
auto scanResult = Scan();
|
|
auto items = Build(language, scanResult);
|
|
return items;
|
|
}
|
|
|
|
protected:
|
|
/**
|
|
* Loads the given file and creates the item representing the data to store in the index.
|
|
*/
|
|
virtual std::optional<TItem> Create(int32_t language, const std::string& path) const abstract;
|
|
|
|
/**
|
|
* Serialises/DeSerialises an index item to/from the given stream.
|
|
*/
|
|
virtual void Serialise(DataSerialiser& ds, const TItem& item) const abstract;
|
|
|
|
private:
|
|
ScanResult Scan() const
|
|
{
|
|
DirectoryStats stats{};
|
|
std::vector<std::string> files;
|
|
for (const auto& directory : SearchPaths)
|
|
{
|
|
auto absoluteDirectory = Path::GetAbsolute(directory);
|
|
LOG_VERBOSE("FileIndex:Scanning for %s in '%s'", _pattern.c_str(), absoluteDirectory.c_str());
|
|
|
|
auto pattern = Path::Combine(absoluteDirectory, _pattern);
|
|
auto scanner = Path::ScanDirectory(pattern, true);
|
|
while (scanner->Next())
|
|
{
|
|
const auto& fileInfo = scanner->GetFileInfo();
|
|
auto path = scanner->GetPath();
|
|
|
|
stats.TotalFiles++;
|
|
stats.TotalFileSize += fileInfo.Size;
|
|
stats.FileDateModifiedChecksum ^= static_cast<uint32_t>(fileInfo.LastModified >> 32)
|
|
^ static_cast<uint32_t>(fileInfo.LastModified & 0xFFFFFFFF);
|
|
stats.FileDateModifiedChecksum = Numerics::ror32(stats.FileDateModifiedChecksum, 5);
|
|
stats.PathChecksum += GetPathChecksum(path);
|
|
|
|
files.push_back(std::move(path));
|
|
}
|
|
}
|
|
return ScanResult(stats, std::move(files));
|
|
}
|
|
|
|
void BuildRange(
|
|
int32_t language, const ScanResult& scanResult, size_t rangeStart, size_t rangeEnd, std::vector<TItem>& items,
|
|
std::atomic<size_t>& processed, std::mutex& printLock) const
|
|
{
|
|
items.reserve(rangeEnd - rangeStart);
|
|
for (size_t i = rangeStart; i < rangeEnd; i++)
|
|
{
|
|
const auto& filePath = scanResult.Files.at(i);
|
|
|
|
if (_log_levels[EnumValue(DiagnosticLevel::Verbose)])
|
|
{
|
|
std::lock_guard<std::mutex> lock(printLock);
|
|
LOG_VERBOSE("FileIndex:Indexing '%s'", filePath.c_str());
|
|
}
|
|
|
|
if (auto item = Create(language, filePath); item.has_value())
|
|
{
|
|
items.push_back(std::move(item.value()));
|
|
}
|
|
|
|
++processed;
|
|
}
|
|
}
|
|
|
|
std::vector<TItem> Build(int32_t language, const ScanResult& scanResult) const
|
|
{
|
|
std::vector<TItem> allItems;
|
|
Console::WriteLine("Building %s (%zu items)", _name.c_str(), scanResult.Files.size());
|
|
|
|
auto startTime = std::chrono::high_resolution_clock::now();
|
|
|
|
const size_t totalCount = scanResult.Files.size();
|
|
if (totalCount > 0)
|
|
{
|
|
JobPool jobPool;
|
|
std::mutex printLock; // For verbose prints.
|
|
|
|
std::list<std::vector<TItem>> containers;
|
|
|
|
size_t stepSize = 100; // Handpicked, seems to work well with 4/8 cores.
|
|
|
|
std::atomic<size_t> processed{ 0 };
|
|
|
|
auto reportProgress = [&]() {
|
|
const size_t completed = processed;
|
|
Console::WriteFormat("File %5zu of %zu, done %3d%%\r", completed, totalCount, completed * 100 / totalCount);
|
|
OpenRCT2::GetContext()->SetProgress(static_cast<uint32_t>(completed), static_cast<uint32_t>(totalCount));
|
|
};
|
|
|
|
for (size_t rangeStart = 0; rangeStart < totalCount; rangeStart += stepSize)
|
|
{
|
|
if (rangeStart + stepSize > totalCount)
|
|
{
|
|
stepSize = totalCount - rangeStart;
|
|
}
|
|
|
|
auto& items = containers.emplace_back();
|
|
|
|
jobPool.AddTask([&, rangeStart, stepSize]() {
|
|
BuildRange(language, scanResult, rangeStart, rangeStart + stepSize, items, processed, printLock);
|
|
});
|
|
|
|
reportProgress();
|
|
}
|
|
|
|
jobPool.Join(reportProgress);
|
|
|
|
for (const auto& itr : containers)
|
|
{
|
|
allItems.insert(allItems.end(), itr.begin(), itr.end());
|
|
}
|
|
}
|
|
|
|
WriteIndexFile(language, scanResult.Stats, allItems);
|
|
|
|
auto endTime = std::chrono::high_resolution_clock::now();
|
|
auto duration = std::chrono::duration<float>(endTime - startTime);
|
|
Console::WriteLine("Finished building %s in %.2f seconds.", _name.c_str(), duration.count());
|
|
|
|
return allItems;
|
|
}
|
|
|
|
std::tuple<bool, std::vector<TItem>> ReadIndexFile(int32_t language, const DirectoryStats& stats) const
|
|
{
|
|
bool loadedItems = false;
|
|
std::vector<TItem> items;
|
|
if (File::Exists(_indexPath))
|
|
{
|
|
try
|
|
{
|
|
LOG_VERBOSE("FileIndex:Loading index: '%s'", _indexPath.c_str());
|
|
auto fs = OpenRCT2::FileStream(_indexPath, OpenRCT2::FILE_MODE_OPEN);
|
|
|
|
// Read header, check if we need to re-scan
|
|
auto header = fs.ReadValue<FileIndexHeader>();
|
|
if (header.HeaderSize == sizeof(FileIndexHeader) && header.MagicNumber == _magicNumber
|
|
&& header.VersionA == FILE_INDEX_VERSION && header.VersionB == _version && header.LanguageId == language
|
|
&& header.Stats.TotalFiles == stats.TotalFiles && header.Stats.TotalFileSize == stats.TotalFileSize
|
|
&& header.Stats.FileDateModifiedChecksum == stats.FileDateModifiedChecksum
|
|
&& header.Stats.PathChecksum == stats.PathChecksum)
|
|
{
|
|
items.reserve(header.NumItems);
|
|
DataSerialiser ds(false, fs);
|
|
// Directory is the same, just read the saved items
|
|
for (uint32_t i = 0; i < header.NumItems; i++)
|
|
{
|
|
TItem item;
|
|
Serialise(ds, item);
|
|
items.emplace_back(std::move(item));
|
|
}
|
|
loadedItems = true;
|
|
}
|
|
else
|
|
{
|
|
Console::WriteLine("%s out of date", _name.c_str());
|
|
}
|
|
}
|
|
catch (const std::exception& e)
|
|
{
|
|
Console::Error::WriteLine("Unable to load index: '%s'.", _indexPath.c_str());
|
|
Console::Error::WriteLine("%s", e.what());
|
|
}
|
|
}
|
|
return std::make_tuple(loadedItems, std::move(items));
|
|
}
|
|
|
|
void WriteIndexFile(int32_t language, const DirectoryStats& stats, const std::vector<TItem>& items) const
|
|
{
|
|
try
|
|
{
|
|
LOG_VERBOSE("FileIndex:Writing index: '%s'", _indexPath.c_str());
|
|
Path::CreateDirectory(Path::GetDirectory(_indexPath));
|
|
auto fs = OpenRCT2::FileStream(_indexPath, OpenRCT2::FILE_MODE_WRITE);
|
|
|
|
// Write header
|
|
FileIndexHeader header;
|
|
header.MagicNumber = _magicNumber;
|
|
header.VersionA = FILE_INDEX_VERSION;
|
|
header.VersionB = _version;
|
|
header.LanguageId = language;
|
|
header.Stats = stats;
|
|
header.NumItems = static_cast<uint32_t>(items.size());
|
|
fs.WriteValue(header);
|
|
|
|
DataSerialiser ds(true, fs);
|
|
// Write items
|
|
for (const auto& item : items)
|
|
{
|
|
Serialise(ds, item);
|
|
}
|
|
}
|
|
catch (const std::exception& e)
|
|
{
|
|
Console::Error::WriteLine("Unable to save index: '%s'.", _indexPath.c_str());
|
|
Console::Error::WriteLine("%s", e.what());
|
|
}
|
|
}
|
|
|
|
static uint32_t GetPathChecksum(const std::string& path)
|
|
{
|
|
uint32_t hash = 0xD8430DED;
|
|
for (const utf8* ch = path.c_str(); *ch != '\0'; ch++)
|
|
{
|
|
hash += (*ch);
|
|
hash += (hash << 10);
|
|
hash ^= (hash >> 6);
|
|
}
|
|
hash += (hash << 3);
|
|
hash ^= (hash >> 11);
|
|
hash += (hash << 15);
|
|
return hash;
|
|
}
|
|
};
|