256 lines
6.8 KiB
C++
256 lines
6.8 KiB
C++
/*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <cmath>
|
|
#include <chrono>
|
|
#include <thread>
|
|
#include "console.h"
|
|
|
|
#ifdef _WIN32
|
|
#include <windows.h>
|
|
|
|
void thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id)
|
|
{
|
|
SetThreadAffinityMask(h, 1 << cpu_id);
|
|
}
|
|
|
|
#else
|
|
#include <pthread.h>
|
|
|
|
void thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id)
|
|
{
|
|
cpu_set_t mn;
|
|
CPU_ZERO(&mn);
|
|
CPU_SET(cpu_id, &mn);
|
|
pthread_setaffinity_np(h, sizeof(cpu_set_t), &mn);
|
|
}
|
|
#endif // _WIN32
|
|
|
|
#include "executor.h"
|
|
#include "minethd.h"
|
|
#include "jconf.h"
|
|
#include "crypto/cryptonight.h"
|
|
|
|
telemetry::telemetry(size_t iThd)
|
|
{
|
|
ppHashCounts = new uint64_t*[iThd];
|
|
ppTimestamps = new uint64_t*[iThd];
|
|
iBucketTop = new uint32_t[iThd];
|
|
|
|
for (size_t i = 0; i < iThd; i++)
|
|
{
|
|
ppHashCounts[i] = new uint64_t[iBucketSize];
|
|
ppTimestamps[i] = new uint64_t[iBucketSize];
|
|
iBucketTop[i] = 0;
|
|
memset(ppHashCounts[0], 0, sizeof(uint64_t) * iBucketSize);
|
|
memset(ppTimestamps[0], 0, sizeof(uint64_t) * iBucketSize);
|
|
}
|
|
}
|
|
|
|
double telemetry::calc_telemetry_data(size_t iLastMilisec, size_t iThread)
|
|
{
|
|
using namespace std::chrono;
|
|
uint64_t iTimeNow = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
|
|
|
|
uint64_t iEarliestHashCnt = 0;
|
|
uint64_t iEarliestStamp = 0;
|
|
uint64_t iLastestStamp = 0;
|
|
uint64_t iLastestHashCnt = 0;
|
|
bool bHaveFullSet = false;
|
|
|
|
//Start at 1, buckettop points to next empty
|
|
for (size_t i = 1; i < iBucketSize; i++)
|
|
{
|
|
size_t idx = (iBucketTop[iThread] - i) & iBucketMask; //overflow expected here
|
|
|
|
if (ppTimestamps[iThread][idx] == 0)
|
|
break; //That means we don't have the data yet
|
|
|
|
if (iLastestStamp == 0)
|
|
{
|
|
iLastestStamp = ppTimestamps[iThread][idx];
|
|
iLastestHashCnt = ppHashCounts[iThread][idx];
|
|
}
|
|
|
|
if (iTimeNow - ppTimestamps[iThread][idx] > iLastMilisec)
|
|
{
|
|
bHaveFullSet = true;
|
|
break; //We are out of the requested time period
|
|
}
|
|
|
|
iEarliestStamp = ppTimestamps[iThread][idx];
|
|
iEarliestHashCnt = ppHashCounts[iThread][idx];
|
|
}
|
|
|
|
if (!bHaveFullSet || iEarliestStamp == 0 || iLastestStamp == 0)
|
|
return nan("");
|
|
|
|
double fHashes, fTime;
|
|
fHashes = iLastestHashCnt - iEarliestHashCnt;
|
|
fTime = iLastestStamp - iEarliestStamp;
|
|
fTime /= 1000.0;
|
|
|
|
return fHashes / fTime;
|
|
}
|
|
|
|
void telemetry::push_perf_value(size_t iThd, uint64_t iHashCount, uint64_t iTimestamp)
|
|
{
|
|
size_t iTop = iBucketTop[iThd];
|
|
ppHashCounts[iThd][iTop] = iHashCount;
|
|
ppTimestamps[iThd][iTop] = iTimestamp;
|
|
|
|
iBucketTop[iThd] = (iTop + 1) & iBucketMask;
|
|
}
|
|
|
|
minethd::minethd(miner_work& pWork, size_t iNo, GpuContext* ctx)
|
|
{
|
|
oWork = pWork;
|
|
bQuit = 0;
|
|
iThreadNo = (uint8_t)iNo;
|
|
iJobNo = 0;
|
|
iHashCount = 0;
|
|
iTimestamp = 0;
|
|
pGpuCtx = ctx;
|
|
|
|
oWorkThd = std::thread(&minethd::work_main, this);
|
|
}
|
|
|
|
bool minethd::init_gpus()
|
|
{
|
|
size_t i, n = jconf::inst()->GetThreadCount();
|
|
|
|
printer::inst()->print_msg(L1, "Compiling code and initializing GPUs. This will take a while...");
|
|
vGpuData.resize(n);
|
|
|
|
jconf::thd_cfg cfg;
|
|
for(i = 0; i < n; i++)
|
|
{
|
|
jconf::inst()->GetThreadConfig(i, cfg);
|
|
vGpuData[i].deviceIdx = cfg.index;
|
|
vGpuData[i].rawIntensity = cfg.intensity;
|
|
vGpuData[i].workSize = cfg.w_size;
|
|
}
|
|
|
|
return InitOpenCL(vGpuData.data(), n, jconf::inst()->GetPlatformIdx()) == ERR_SUCCESS;
|
|
}
|
|
|
|
std::atomic<uint64_t> minethd::iGlobalJobNo;
|
|
std::atomic<uint64_t> minethd::iConsumeCnt; //Threads get jobs as they are initialized
|
|
minethd::miner_work minethd::oGlobalWork;
|
|
uint64_t minethd::iThreadCount = 0;
|
|
std::vector<GpuContext> minethd::vGpuData;
|
|
|
|
std::vector<minethd*>* minethd::thread_starter(miner_work& pWork)
|
|
{
|
|
iGlobalJobNo = 0;
|
|
iConsumeCnt = 0;
|
|
std::vector<minethd*>* pvThreads = new std::vector<minethd*>;
|
|
|
|
size_t i, n = jconf::inst()->GetThreadCount();
|
|
pvThreads->reserve(n);
|
|
|
|
jconf::thd_cfg cfg;
|
|
for (i = 0; i < n; i++)
|
|
{
|
|
jconf::inst()->GetThreadConfig(i, cfg);
|
|
minethd* thd = new minethd(pWork, i, &vGpuData[i]);
|
|
|
|
if(cfg.cpu_aff >= 0)
|
|
thd_setaffinity(thd->oWorkThd.native_handle(), cfg.cpu_aff);
|
|
|
|
pvThreads->push_back(thd);
|
|
if(cfg.cpu_aff >= 0)
|
|
printer::inst()->print_msg(L1, "Starting GPU thread, affinity: %d.", (int)cfg.cpu_aff);
|
|
else
|
|
printer::inst()->print_msg(L1, "Starting GPU thread, no affinity.");
|
|
}
|
|
|
|
iThreadCount = n;
|
|
return pvThreads;
|
|
}
|
|
|
|
void minethd::switch_work(miner_work& pWork)
|
|
{
|
|
// iConsumeCnt is a basic lock-like polling mechanism just in case we happen to push work
|
|
// faster than threads can consume them. This should never happen in real life.
|
|
// Pool cant physically send jobs faster than every 250ms or so due to net latency.
|
|
|
|
while (iConsumeCnt.load(std::memory_order_seq_cst) < iThreadCount)
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
|
|
|
oGlobalWork = pWork;
|
|
iConsumeCnt.store(0, std::memory_order_seq_cst);
|
|
iGlobalJobNo++;
|
|
}
|
|
|
|
void minethd::consume_work()
|
|
{
|
|
memcpy(&oWork, &oGlobalWork, sizeof(miner_work));
|
|
iJobNo++;
|
|
iConsumeCnt++;
|
|
|
|
if(!oWork.bStall)
|
|
{
|
|
pGpuCtx->Nonce = calc_start_nonce(oWork.iResumeCnt);
|
|
XMRSetJob(pGpuCtx, oWork.bWorkBlob, oWork.iWorkSize, oWork.iTarget);
|
|
}
|
|
}
|
|
|
|
void minethd::work_main()
|
|
{
|
|
uint64_t iCount = 0;
|
|
iConsumeCnt++;
|
|
while (bQuit == 0)
|
|
{
|
|
if (oWork.bStall)
|
|
{
|
|
/* We are stalled here because the executor didn't find a job for us yet,
|
|
either because of network latency, or a socket problem. Since we are
|
|
raison d'etre of this software it us sensible to just wait until we have something*/
|
|
|
|
while (iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
|
|
|
consume_work();
|
|
continue;
|
|
}
|
|
|
|
assert(sizeof(job_result::sJobID) == sizeof(pool_job::sJobID));
|
|
|
|
while(iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
|
|
{
|
|
cl_uint results[0x100] = { 0 };
|
|
|
|
XMRRunJob(pGpuCtx, results);
|
|
|
|
for(size_t i = 0; i < results[0xFF]; i++)
|
|
{
|
|
executor::inst()->push_event(ex_event(job_result(oWork.sJobID, oWork.bWorkBlob,
|
|
oWork.iWorkSize, oWork.iTarget, results[i]), oWork.iPoolId));
|
|
}
|
|
|
|
iCount += pGpuCtx->rawIntensity;
|
|
using namespace std::chrono;
|
|
uint64_t iStamp = time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count();
|
|
iHashCount.store(iCount, std::memory_order_relaxed);
|
|
iTimestamp.store(iStamp, std::memory_order_relaxed);
|
|
std::this_thread::yield();
|
|
}
|
|
|
|
consume_work();
|
|
}
|
|
}
|