diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 000000000..2349ab411
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,16 @@
+version: 2
+jobs:
+ build:
+ machine: true
+ working_directory: /home/circleci/project
+
+ steps:
+ - checkout
+
+ - run:
+ name: Build the docker image
+ command: docker build -t xmr-stak:$CIRCLE_BRANCH /home/circleci/project
+
+ - run:
+ name: Run a benchmark with Monero V8
+ command: docker run --rm -t xmr-stak:$CIRCLE_BRANCH /usr/local/bin/xmr-benchmark.sh
\ No newline at end of file
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..25ba43d61
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,14 @@
+IndentWidth: 4
+TabWidth: 4
+ColumnLimit: 0
+BreakBeforeBraces: Allman
+AllowShortIfStatementsOnASingleLine: false
+IndentCaseLabels: false
+SpaceBeforeParens: Never
+UseTab: Always
+AlignAfterOpenBracket: DontAlign
+PointerBindsToType: true
+BreakConstructorInitializers: AfterColon
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
deleted file mode 100644
index 8451f3289..000000000
--- a/.github/ISSUE_TEMPLATE.md
+++ /dev/null
@@ -1,30 +0,0 @@
-Please provide as much as possible information to reproduce the issue.
-
-# Basic information
- - Type of the CPU.
- - Type of the GPU (if you try to miner with the GPU).
-
-# Compile issues
- - Which OS do you use?
- ```
- add **all** commands you used and the **full** compile output here
- ```
- ```
- run `cmake -LA .` in the build folder and add the output here
- ```
-
-# Issue with the execution
- - Do you compiled the miner by our own?
- ```
- run `./xmr-stak --version-long` and add the output here
- ```
-
-# AMD OpenCl issue
-
- ```
- run `clinfo` and add the output here
- ```
-
-# Stability issue
- - Is the CPU or GPU overclocked?
- - Is the Main memory of the CPU or GPU undervolted?
diff --git a/.github/ISSUE_TEMPLATE/compile_bug_report.md b/.github/ISSUE_TEMPLATE/compile_bug_report.md
new file mode 100644
index 000000000..899ad941f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/compile_bug_report.md
@@ -0,0 +1,35 @@
+---
+name: Compile bug report
+about: You have an issue to compile xmr-stak.
+
+---
+
+`...` are the placeholder for your answers. Please answer each question!
+
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**Which operating system do you use? **
+
+```
+...
+```
+
+**To Reproduce**
+```
+# Please post all commands and the output.
+...
+```
+
+**Additional information.**
+
+```
+# run `cmake -LA .` in the build folder and add the output here
+...
+```
+
+**Feel free to add more information.**
+```
+...
+```
diff --git a/.github/ISSUE_TEMPLATE/execution_bug_report.md b/.github/ISSUE_TEMPLATE/execution_bug_report.md
new file mode 100644
index 000000000..44ac89bf1
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/execution_bug_report.md
@@ -0,0 +1,7 @@
+---
+name: Execution bug report
+about: You have an issue to execute xmr-stak.
+
+---
+
+**Most execution issues are caused by driver problems. Please use the [xmr-stak sub-reddit](https://www.reddit.com/r/XmrStak/) to ask for help instead of opening an issue here.**
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 000000000..90f5e4f3d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,7 @@
+---
+name: Feature request
+about: Suggest an idea for xmr-stak.
+
+---
+
+**Please explain the feature as good as possible.**
diff --git a/.github/ISSUE_TEMPLATE/tuning_help.md b/.github/ISSUE_TEMPLATE/tuning_help.md
new file mode 100644
index 000000000..40dedef05
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/tuning_help.md
@@ -0,0 +1,7 @@
+---
+name: Need help for optimization.
+about: You need help to optimize your setup.
+
+---
+
+**Please use the [xmr-stak sub-reddit](https://www.reddit.com/r/XmrStak/) to discuss optimizations.**
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 41e993eee..795829e66 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,13 +44,13 @@ endif()
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "${BUILD_TYPE}")
set(XMR-STAK_COMPILE "native" CACHE STRING "select CPU compute architecture")
-set_property(CACHE XMR-STAK_COMPILE PROPERTY STRINGS "native;generic")
+set_property(CACHE XMR-STAK_COMPILE PROPERTY STRINGS "native;generic;dev_release")
if(XMR-STAK_COMPILE STREQUAL "native")
if(NOT CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(CMAKE_CXX_FLAGS "-march=native -mtune=native ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=native -mtune=native ${CMAKE_C_FLAGS}")
endif()
-elseif(XMR-STAK_COMPILE STREQUAL "generic")
+elseif(XMR-STAK_COMPILE STREQUAL "generic" OR XMR-STAK_COMPILE STREQUAL "dev_release")
add_definitions("-DCONF_ENFORCE_OpenCL_1_2=1")
else()
message(FATAL_ERROR "XMR-STAK_COMPILE is set to an unknown value '${XMR-STAK_COMPILE}'")
@@ -496,6 +496,10 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
set(CMAKE_C_FLAGS "-Wl,-z,noexecstack ${CMAKE_C_FLAGS}")
endif()
+if(XMR-STAK_COMPILE STREQUAL "dev_release")
+ add_definitions(-DXMRSTAK_DEV_RELEASE)
+endif()
+
# activate static libgcc and libstdc++ linking
if(CMAKE_LINK_STATIC)
set(BUILD_SHARED_LIBRARIES OFF)
@@ -586,7 +590,16 @@ if(CUDA_FOUND)
)
endif()
- set(CUDA_LIBRARIES ${CUDA_LIB} ${CUDA_NVRTC_LIB} ${CUDA_LIBRARIES})
+ set(CUDA_LIBRARIES ${CUDA_LIB} ${CUDA_LIBRARIES})
+ if(XMR-STAK_COMPILE STREQUAL "dev_release")
+ # do not link nvrtc for linux binaries, cn-r will be disabled
+ if(WIN32)
+ set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_NVRTC_LIB})
+ endif()
+ else()
+ set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_NVRTC_LIB})
+ endif()
+
target_link_libraries(xmrstak_cuda_backend ${CUDA_LIBRARIES})
target_link_libraries(xmrstak_cuda_backend xmr-stak-backend xmr-stak-asm)
endif()
diff --git a/README.md b/README.md
index c890da1a5..2e2eb61fa 100644
--- a/README.md
+++ b/README.md
@@ -1,102 +1,10 @@
-###### fireice-uk's and psychocrypt's
-# XMR-Stak - Cryptonight All-in-One Mining Software
-
-XMR-Stak is a universal Stratum pool miner. This miner supports CPUs, AMD and NVIDIA GPUs and can be used to mine the crypto currencies Monero, Aeon and many more Cryptonight coins.
-
-## HTML reports
-
-
-## Video setup guide on Windows
-
-[
](https://youtu.be/YNMa8NplWus)
-###### Video by Crypto Sewer
-
-## Overview
-* [Features](#features)
-* [Supported altcoins](#supported-altcoins)
-* [Download](#download)
-* [Usage](doc/usage.md)
-* [HowTo Compile](doc/compile.md)
-* [FAQ](doc/FAQ.md)
-* [Developer Donation](#default-developer-donation)
-* [Developer PGP Key's](doc/pgp_keys.md)
-
-## Features
-
-- support all common backends (CPU/x86, AMD-GPU and NVIDIA-GPU)
-- support all common OS (Linux, Windows and macOS)
-- supports algorithm cryptonight for Monero (XMR) and cryptonight-light (AEON)
-- easy to use
- - guided start (no need to edit a config file for the first start)
- - auto-configuration for each backend
-- open source software (GPLv3)
-- TLS support
-- [HTML statistics](doc/usage.md#html-and-json-api-report-configuraton)
-- [JSON API for monitoring](doc/usage.md#html-and-json-api-report-configuraton)
-
-## Supported altcoins
-
-Besides [Monero](https://getmonero.org), following coins can be mined using this miner:
-
-- [Aeon](http://www.aeon.cash)
-- [BBSCoin](https://www.bbscoin.xyz)
-- [BitTube](https://coin.bit.tube/)
-- [Conceal](https://conceal.network)
-- [Graft](https://www.graft.network)
-- [Haven](https://havenprotocol.com)
-- [Lethean](https://lethean.io)
-- [Masari](https://getmasari.org)
-- [Plenteum](https://www.plenteum.com/)
-- [QRL](https://theqrl.org)
-- **[Ryo](https://ryo-currency.com) - Upcoming xmr-stak-gui is sponsored by Ryo**
-- [Stellite](https://stellite.cash/)
-- [TurtleCoin](https://turtlecoin.lol)
-- [Zelerius](https://zelerius.org/)
-- [X-CASH](https://x-network.io/)
-
-Ryo currency is a way for us to implement the ideas that we were unable to in
-Monero. See [here](https://github.com/fireice-uk/cryptonote-speedup-demo/) for details.
-
-If your prefered coin is not listed, you can choose one of the following algorithms:
-- 256Kib scratchpad memory
- - cryptonight_turtle
-- 1MiB scratchpad memory
- - cryptonight_lite
- - cryptonight_lite_v7
- - cryptonight_lite_v7_xor (algorithm used by ipbc)
-- 2MiB scratchpad memory
- - cryptonight
- - cryptonight_gpu (for Ryo's 14th of Feb fork)
- - cryptonight_masari (used in 2018)
- - cryptonight_v7
- - cryptonight_v7_stellite
- - cryptonight_v8
- - cryptonight_v8_double (used by X-CASH)
- - cryptonight_v8_half (used by masari and stellite)
- - cryptonight_v8_reversewaltz (used by graft)
- - cryptonight_v8_zelerius
-- 4MiB scratchpad memory
- - cryptonight_haven
- - cryptonight_heavy
-
-Please note, this list is not complete and is not an endorsement.
-
-## Download
-
-You can find the latest releases and precompiled binaries on GitHub under [Releases](https://github.com/fireice-uk/xmr-stak/releases).
-
-## Default Developer Donation
-
-By default, the miner will donate 2% of the hashpower (2 minutes in 100 minutes) to my pool. If you want to change that, edit [donate-level.hpp](xmrstak/donate-level.hpp) before you build the binaries.
-
-If you want to donate directly to support further development, here is my wallet
-
-fireice-uk:
-```
-4581HhZkQHgZrZjKeCfCJxZff9E3xCgHGF25zABZz7oR71TnbbgiS7sK9jveE6Dx6uMs2LwszDuvQJgRZQotdpHt1fTdDhk
-```
-
-psychocrypt:
-```
-45tcqnJMgd3VqeTznNotiNj4G9PQoK67TGRiHyj6EYSZ31NUbAfs9XdiU5squmZb717iHJLxZv3KfEw8jCYGL5wa19yrVCn
-```
+
+
+
\ No newline at end of file
diff --git a/doc/FAQ.md b/doc/FAQ.md
index f744e3d24..b78ac15cb 100644
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -1,104 +1,27 @@
# FAQ
+To improve our support we created [Xmr-Stak forum](https://www.reddit.com/r/XmrStak). Check it out if you have a problem, or you are looking for most up to date config for your card and [guides](https://www.reddit.com/r/XmrStak/wiki/index).
+
## Content Overview
-* ["Obtaining SeLockMemoryPrivilege failed."](#obtaining-selockmemoryprivilege-failed)
-* [VirtualAlloc failed](#virtualalloc-failed)
-* [Error msvcp140.dll and vcruntime140.dll not available](#error-msvcp140dll-and-vcruntime140dll-not-available)
-* [Error: MEMORY ALLOC FAILED: mmap failed](#error-memory-alloc-failed-mmap-failed)
-* [Illegal instruction (core dumped)](#illegal-instruction)
* [Virus Protection Alert](#virus-protection-alert)
* [Change Currency to Mine](#change-currency-to-mine)
* [How can I mine Monero](#how-can-i-mine-monero)
* [Which currency must be chosen if my fork coin is not listed](#which-currency-must-be-chosen-if-my-fork-coin-is-not-listed)
-* [Internal compiler error: Killed (program cc1plus)](#internal-compiler-error)
-
-## "Obtaining SeLockMemoryPrivilege failed."
-
-For professional versions of Windows see [this article](https://msdn.microsoft.com/en-gb/library/ms190730.aspx).
-Make sure to reboot afterwards!
-
-For Windows 7/10 Home:
-
-1) Download and install [Windows Server 2003 Resource Kit Tools](https://www.microsoft.com/en-us/download/details.aspx?id=17657). Ignore any incompatibility warning during installation.
-
-2) Open cmd or PowerShell as an administrator.
-
-3) Use `ntrights -u %USERNAME% +r SeLockMemoryPrivilege` where %USERNAME% is the user that will be running the program.
-
-4) Reboot.
-
-Reference: http://rybkaforum.net/cgi-bin/rybkaforum/topic_show.pl?pid=259791#pid259791
-
-*Warning: Do not download ntrights.exe from any other site other than the offical Microsoft download page.*
-
-## VirtualAlloc failed
-
-If you set up the user rights properly ([see above](https://github.com/fireice-uk/xmr-stak/blob/master/doc/FAQ.md#selockmemoryprivilege-failed)), and your system has 4-8GB of RAM (50%+ use), there is a significant chance that there simply won't be a large enough chunk of contiguous memory because Windows is fairly bad at mitigating memory fragmentation.
-
-If that happens, disable all auto-starting applications and run the miner after a reboot.
-
-## Error msvcp140.dll and vcruntime140.dll not available
-
-Download and install this [runtime package](https://go.microsoft.com/fwlink/?LinkId=746572) from Microsoft. *Warning: Do NOT use "missing dll" sites - dll's are exe files with another name, and it is a fairly safe bet that any dll on a shady site like that will be trojaned. Please download offical runtimes from Microsoft above.*
-
-
-## Error: MEMORY ALLOC FAILED: mmap failed
-
-On Linux you will need to configure large page support and increase your memlock limit (`ulimit -l`).
-
-Never put settings directly into `/etc/sysctl.conf` or `/etc/security/limits.conf` as those are system defaults and can be replaced in upgrades, and custom settings in that file are deprecated in all distros since at least wheezy/trusty (has been illegal in RedHat based distros for longer than that), and will be even more deprecated with systemd (it no longer even reads sysctl.conf, ONLY sysctl.d files, for example - there is a link to the old `/etc/sysctl.conf` for backward compatibility but that can go away at any time). Also adding to `/etc/rc.local` is extra incorrect, systemd does not even use that file anymore (once the sysvinit compatibility layer is gone, rc.local will no longer work).
-
-To check current settings, run `/sbin/sysctl vm.nr_hugepages ; ulimit -l` as whatever user you will run `xmr-stak` as (example shows bad/low sample defaults):
-
- $ /sbin/sysctl vm.nr_hugepages ; ulimit -l
- vm.nr_hugepages = 0
- 16
-
-To set large page support, add the following lines to `/etc/sysctl.d/60-hugepages.conf`:
-
- vm.nr_hugepages=128
-
-You WILL need to run `sudo sysctl --system` for these settings to take effect on your system (or reboot). In some cases (many threads, very large CPU, etc) you may need more than 128 (try 256 if there are still complaints from thread inits)
-
-To increase the memlock (ulimit -l), add following lines to `/etc/security/limits.d/60-memlock.conf`:
-
- * - memlock 262144
- root - memlock 262144
-
-You WILL need to log out and log back in for these settings to take effect on your user (no need to reboot, just relogin in your session).
-Recheck after completing these steps to validate:
-
- $ /sbin/sysctl vm.nr_hugepages ; ulimit -l
- vm.nr_hugepages = 128
- 262144
-
-You can also do it Windows-style and simply run-as-root, but this is NOT recommended for security reasons. Also running as root does not properly get around the `ulimit -l` being large enough (and limits `*` does not apply to `root` either, it must be specified explicitly).
-
-## Illegal Instruction
-
-This typically means you are trying to run it on a CPU that does not have [AES](https://en.wikipedia.org/wiki/AES_instruction_set). This only happens on older version of miner, new version gives better error message (but still wont' work since your CPU doesn't support the required instructions).
-
-## Virus Protection Alert
+### Virus Protection Alert
Some virus protection software flags the miner binary as *malware*. This is a false positive — the software does not contain any malware (and since it is open source, you can verify that yourself!)
If your antivirus software flags **xmr-stak**, it will likely move it to its quarantine area. You may have to whitelist **xmr-stak** in your antivirus.
-## Change Currency to Mine
-
+### Change Currency to Mine
If the miner is compiled for Monero and Aeon than you can change
- the value `currency` in the config *or*
- start the miner with the [command line option](usage.md) `--currency monero` or `--currency aeon7`
- run `xmr-stak --help` to see all supported currencies and algorithms
-## How can I mine Monero
-
+### How can I mine Monero
Set the value `currency` in `pools.txt` to `monero`.
-## Which currency must be chosen if my fork coin is not listed
-
+### Which currency must be chosen if my fork coin is not listed
If your coin you want to mine is not listed please check the documentation of the coin and try to find out if `cryptonight` or `cryptonight-lite` is the used algorithm.
Select one of these generic coin algorithms.
-## Internal compiler error
-
-Seeing `g++: internal compiler error: Killed (program cc1plus)` is probably related to not enough RAM to compile. 1 Gb RAM should be enough (it is on clean Ubuntu 16.04).
diff --git a/doc/README.md b/doc/README.md
new file mode 100644
index 000000000..7a1f13288
--- /dev/null
+++ b/doc/README.md
@@ -0,0 +1,132 @@
+
+
+
+
+
+
+
+## Introduction
+XMR-Stak is a universal open source stratum pool miner. This miner supports CPUs, AMD and NVIDIA GPUs and can be used for mining various crypto currencies: Ryo, Graft, Bittube, Conceal, Haven and many more Cryptonight coins.
+
+## Features overview
+[
](#)
+
+## Supported coins and algorithms
+Xmr-Stak supports various variants of Cryptonight algorithm. Use one of the following options (type this coin alias in either `pool.txt` config file or on startup configuration under `"currency"` parameter and miner will pick it's variant of Cryptonight algorithm for mining):
+
+| | | |
+| --- | --- | --- |
+| [BitTube](https://coin.bit.tube/) | [Plenteum](https://www.plenteum.com/) | |
+| [Conceal](https://conceal.network) | [QRL](https://theqrl.org) | |
+| [Graft](https://www.graft.network) | [Ryo](https://ryo-currency.com) | **Atom Wallet Solo mining mode is sponsored by [RYO](https://ryo-currency.com/)** |
+| [Haven](https://havenprotocol.com) | [X-CASH](https://x-network.io/) | |
+| [Lethean](https://lethean.io) | [Zelerius](https://zelerius.org/) | |
+| [Masari](https://getmasari.org) | | |
+
+
+**[Ryo Currency](https://ryo-currency.com)** - is a way for us to implement the ideas that we were unable to in
+Monero. See [here](https://github.com/fireice-uk/cryptonote-speedup-demo/) for details.
+
+If your preferred coin is not listed, you can choose one of the following mining algorithms:
+
+| 256 KiB scratchpad memory | 1 MiB scratchpad memory | 2 MiB scratchpad memory | 4 MiB scratchpad memory |
+| --- | --- | --- | --- |
+| cryptonight_turtle | cryptonight_lite | cryptonight | cryptonight_bittube2 |
+| --- | cryptonight_lite_v7 | cryptonight_gpu | cryptonight_haven |
+| --- | --- | cryptonight_conceal | cryptonight_heavy |
+| --- | --- | cryptonight_r | --- |
+| --- | --- | cryptonight_masari (used in 2018) | --- |
+| --- | --- | cryptonight_v8_reversewaltz | --- |
+| --- | --- | cryptonight_v7 | --- |
+| --- | --- | cryptonight_v8 | --- |
+| --- | --- | cryptonight_v8_half (used by masari) | --- |
+| --- | --- | cryptonight_v8_double (used by X-CASH) | --- |
+| --- | --- | cryptonight_v8_zelerius | --- |
+
+Please note, this list is not complete and is not an endorsement.
+
+
+## Get Miner
+Please note that code is developed on the [dev branch](https://github.com/fireice-uk/xmr-stak/commits/dev), if you want to check out the latest updates, before they are merged on main branch, please refer there. Master branch will always point to a version that we consider stable, so you can download the code by simply typing `git clone https://github.com/fireice-uk/xmr-stak.git`
+
+Also you can find the latest releases and precompiled binaries on GitHub under [releases](https://github.com/fireice-uk/xmr-stak/releases/latest) section.
+
+If you want to compile the miner from source files, navigate to ["how to compile"](compile/compile.md) section of docs or [xmr-stak forum](https://www.reddit.com/r/XmrStak/wiki/guides/startup) where you will find the latest step-by-step instructions.
+
+
+## Start Mining
+Miner has 2 ways of initial configuring: simple and advanced. The simple method will prompt user with minimum information. Required answers are y , (or yes), n , (or no):
+
+#### Simple setup:
+* `Use simple setup method?` y
+* `Please enter the currency that you want to mine:` Enter currency or mining algorithm
+* `Enter pool address (pool address:port):` Enter pool connection address:port
+* `Username (wallet address or pool login):` Enter wallet address
+* `Password (mostly empty or x):` press Enter
+* `Does this pool port support TLS/SSL? Use no if unknown. (y/N):` press y or n
+
+#### Advanced setup:
+* `Use simple setup method?` n
+* `Do you want to use the HTTP interface? Unlike the screen display, browser interface is not affected by the GPU lag. If you don't want to use it, please enter 0, otherwise enter port number that the miner should listen on` 5656
+* `Please enter the currency that you want to mine:` Enter currency or mining algorithm
+* `Enter pool address (pool address:port):` Enter pool connection address:port
+* `Username (wallet address or pool login):` Enter wallet address
+* `Password (mostly empty or x):` press Enter
+* `Rig identifier for pool-side statistics (needs pool support). Can be empty:` Enter rig name or press Enter
+* `Does this pool port support TLS/SSL? Use no if unknown. (y/N)` Enter y or n
+* `Do you want to use nicehash on this pool? (y/N)` n
+* `Do you want to use multiple pools? (y/N)` Enter y if you want to se up backup pool or n
+
+
+## Additional Guides and Feedback
+[
](https://www.youtube.com/c/xmrstak)
+###### Video by Crypto Sewer
+
+To improve our support we created [Xmr-Stak forum](https://www.reddit.com/r/XmrStak). Check it out if you have a problem, or you are looking for most up to date config for your card and [guides](https://www.reddit.com/r/XmrStak/wiki/index).
+
+
+
+## Default Developer Donation
+By default, the miner will donate 2% of the hashpower (2 minutes in 100 minutes) to my pool. If you want to change that, edit [donate-level.hpp](xmrstak/donate-level.hpp) before you build the binaries.
+
+If you want to donate directly to support further development, here is my wallet
+
+fireice-uk:
+```
+4581HhZkQHgZrZjKeCfCJxZff9E3xCgHGF25zABZz7oR71TnbbgiS7sK9jveE6Dx6uMs2LwszDuvQJgRZQotdpHt1fTdDhk
+```
+
+psychocrypt:
+```
+45tcqnJMgd3VqeTznNotiNj4G9PQoK67TGRiHyj6EYSZ31NUbAfs9XdiU5squmZb717iHJLxZv3KfEw8jCYGL5wa19yrVCn
+```
\ No newline at end of file
diff --git a/doc/_img/2ragerx-btn.png b/doc/_img/2ragerx-btn.png
new file mode 100644
index 000000000..1c0edd98c
Binary files /dev/null and b/doc/_img/2ragerx-btn.png differ
diff --git a/doc/_img/2xmr-stak-btn.png b/doc/_img/2xmr-stak-btn.png
new file mode 100644
index 000000000..7626e27c1
Binary files /dev/null and b/doc/_img/2xmr-stak-btn.png differ
diff --git a/doc/_img/YT.png b/doc/_img/YT.png
new file mode 100644
index 000000000..cf7a869a2
Binary files /dev/null and b/doc/_img/YT.png differ
diff --git a/doc/_img/cpu.png b/doc/_img/cpu.png
new file mode 100644
index 000000000..6a370fbc9
Binary files /dev/null and b/doc/_img/cpu.png differ
diff --git a/doc/_img/faq-green.png b/doc/_img/faq-green.png
new file mode 100644
index 000000000..440a855b2
Binary files /dev/null and b/doc/_img/faq-green.png differ
diff --git a/doc/_img/faq.png b/doc/_img/faq.png
new file mode 100644
index 000000000..83167e3c7
Binary files /dev/null and b/doc/_img/faq.png differ
diff --git a/doc/_img/features-xmr-stak.png b/doc/_img/features-xmr-stak.png
new file mode 100644
index 000000000..ef75a3b14
Binary files /dev/null and b/doc/_img/features-xmr-stak.png differ
diff --git a/doc/_img/features.png b/doc/_img/features.png
new file mode 100644
index 000000000..37c877291
Binary files /dev/null and b/doc/_img/features.png differ
diff --git a/doc/_img/fee.png b/doc/_img/fee.png
new file mode 100644
index 000000000..cd3cdaf00
Binary files /dev/null and b/doc/_img/fee.png differ
diff --git a/doc/_img/fine-tuning-green.png b/doc/_img/fine-tuning-green.png
new file mode 100644
index 000000000..b58184bfa
Binary files /dev/null and b/doc/_img/fine-tuning-green.png differ
diff --git a/doc/_img/fine-tuning.png b/doc/_img/fine-tuning.png
new file mode 100644
index 000000000..6b817cffe
Binary files /dev/null and b/doc/_img/fine-tuning.png differ
diff --git a/doc/_img/gpu.png b/doc/_img/gpu.png
new file mode 100644
index 000000000..4d5578007
Binary files /dev/null and b/doc/_img/gpu.png differ
diff --git a/doc/_img/header.png b/doc/_img/header.png
new file mode 100644
index 000000000..8c9eeefad
Binary files /dev/null and b/doc/_img/header.png differ
diff --git a/doc/_img/how-to-compile-green.png b/doc/_img/how-to-compile-green.png
new file mode 100644
index 000000000..e82c8b693
Binary files /dev/null and b/doc/_img/how-to-compile-green.png differ
diff --git a/doc/_img/how-to-compile.png b/doc/_img/how-to-compile.png
new file mode 100644
index 000000000..a54603484
Binary files /dev/null and b/doc/_img/how-to-compile.png differ
diff --git a/doc/_img/html_reports.png b/doc/_img/html_reports.png
new file mode 100644
index 000000000..2d17bc1bf
Binary files /dev/null and b/doc/_img/html_reports.png differ
diff --git a/doc/img/interleave.png b/doc/_img/interleave.png
similarity index 100%
rename from doc/img/interleave.png
rename to doc/_img/interleave.png
diff --git a/doc/_img/menu-donations-green.png b/doc/_img/menu-donations-green.png
new file mode 100644
index 000000000..a299980d3
Binary files /dev/null and b/doc/_img/menu-donations-green.png differ
diff --git a/doc/_img/menu-donations.png b/doc/_img/menu-donations.png
new file mode 100644
index 000000000..f73facf6f
Binary files /dev/null and b/doc/_img/menu-donations.png differ
diff --git a/doc/_img/menu-features-green.png b/doc/_img/menu-features-green.png
new file mode 100644
index 000000000..527d68d4c
Binary files /dev/null and b/doc/_img/menu-features-green.png differ
diff --git a/doc/_img/menu-features.png b/doc/_img/menu-features.png
new file mode 100644
index 000000000..bcf71064d
Binary files /dev/null and b/doc/_img/menu-features.png differ
diff --git a/doc/_img/menu-get-miner-green.png b/doc/_img/menu-get-miner-green.png
new file mode 100644
index 000000000..9e3bd5753
Binary files /dev/null and b/doc/_img/menu-get-miner-green.png differ
diff --git a/doc/_img/menu-get-miner.png b/doc/_img/menu-get-miner.png
new file mode 100644
index 000000000..891a35f16
Binary files /dev/null and b/doc/_img/menu-get-miner.png differ
diff --git a/doc/_img/menu-support-green.png b/doc/_img/menu-support-green.png
new file mode 100644
index 000000000..3db8e76ef
Binary files /dev/null and b/doc/_img/menu-support-green.png differ
diff --git a/doc/_img/menu-support.png b/doc/_img/menu-support.png
new file mode 100644
index 000000000..5cd80e42f
Binary files /dev/null and b/doc/_img/menu-support.png differ
diff --git a/doc/_img/menu-supported-coins-green.png b/doc/_img/menu-supported-coins-green.png
new file mode 100644
index 000000000..8678ea444
Binary files /dev/null and b/doc/_img/menu-supported-coins-green.png differ
diff --git a/doc/_img/menu-supported-coins.png b/doc/_img/menu-supported-coins.png
new file mode 100644
index 000000000..aabc37283
Binary files /dev/null and b/doc/_img/menu-supported-coins.png differ
diff --git a/doc/_img/ragerx-btn.png b/doc/_img/ragerx-btn.png
new file mode 100644
index 000000000..d08e245fc
Binary files /dev/null and b/doc/_img/ragerx-btn.png differ
diff --git a/doc/_img/ragerx.png b/doc/_img/ragerx.png
new file mode 100644
index 000000000..bc2453d2a
Binary files /dev/null and b/doc/_img/ragerx.png differ
diff --git a/doc/_img/rx.png b/doc/_img/rx.png
new file mode 100644
index 000000000..d9c4c3dfa
Binary files /dev/null and b/doc/_img/rx.png differ
diff --git a/doc/_img/split.png b/doc/_img/split.png
new file mode 100644
index 000000000..11a8635b9
Binary files /dev/null and b/doc/_img/split.png differ
diff --git a/doc/_img/stak-yt-cover.jpg b/doc/_img/stak-yt-cover.jpg
new file mode 100644
index 000000000..ff21acebf
Binary files /dev/null and b/doc/_img/stak-yt-cover.jpg differ
diff --git a/doc/_img/troubleshooting-green.png b/doc/_img/troubleshooting-green.png
new file mode 100644
index 000000000..d36cec8b8
Binary files /dev/null and b/doc/_img/troubleshooting-green.png differ
diff --git a/doc/_img/troubleshooting.png b/doc/_img/troubleshooting.png
new file mode 100644
index 000000000..e57eda740
Binary files /dev/null and b/doc/_img/troubleshooting.png differ
diff --git a/doc/_img/usage-green.png b/doc/_img/usage-green.png
new file mode 100644
index 000000000..c60b9a432
Binary files /dev/null and b/doc/_img/usage-green.png differ
diff --git a/doc/_img/usage.png b/doc/_img/usage.png
new file mode 100644
index 000000000..d9421ba66
Binary files /dev/null and b/doc/_img/usage.png differ
diff --git a/doc/_img/xmr-stak-btn-active.png b/doc/_img/xmr-stak-btn-active.png
new file mode 100644
index 000000000..68520be91
Binary files /dev/null and b/doc/_img/xmr-stak-btn-active.png differ
diff --git a/doc/_img/xmr-stak-btn.png b/doc/_img/xmr-stak-btn.png
new file mode 100644
index 000000000..0356f41aa
Binary files /dev/null and b/doc/_img/xmr-stak-btn.png differ
diff --git a/doc/_img/xmr-stak-cpu-connection.png b/doc/_img/xmr-stak-cpu-connection.png
new file mode 100644
index 000000000..d07a8d0a9
Binary files /dev/null and b/doc/_img/xmr-stak-cpu-connection.png differ
diff --git a/doc/_img/xmr-stak-cpu-hashrate.png b/doc/_img/xmr-stak-cpu-hashrate.png
new file mode 100644
index 000000000..488a34825
Binary files /dev/null and b/doc/_img/xmr-stak-cpu-hashrate.png differ
diff --git a/doc/_img/xmr-stak-cpu-results.png b/doc/_img/xmr-stak-cpu-results.png
new file mode 100644
index 000000000..7244f9579
Binary files /dev/null and b/doc/_img/xmr-stak-cpu-results.png differ
diff --git a/doc/_img/xmr-stak-rx-btn-inactive.png b/doc/_img/xmr-stak-rx-btn-inactive.png
new file mode 100644
index 000000000..1644a9505
Binary files /dev/null and b/doc/_img/xmr-stak-rx-btn-inactive.png differ
diff --git a/doc/_img/xmr-stak-rx-btn.png b/doc/_img/xmr-stak-rx-btn.png
new file mode 100644
index 000000000..39f0c87f7
Binary files /dev/null and b/doc/_img/xmr-stak-rx-btn.png differ
diff --git a/doc/_img/xmrig.png b/doc/_img/xmrig.png
new file mode 100644
index 000000000..cdeaa4501
Binary files /dev/null and b/doc/_img/xmrig.png differ
diff --git a/doc/compile.md b/doc/compile/compile.md
similarity index 100%
rename from doc/compile.md
rename to doc/compile/compile.md
diff --git a/doc/compile_FreeBSD.md b/doc/compile/compile_FreeBSD.md
similarity index 100%
rename from doc/compile_FreeBSD.md
rename to doc/compile/compile_FreeBSD.md
diff --git a/doc/compile_Linux.md b/doc/compile/compile_Linux.md
similarity index 100%
rename from doc/compile_Linux.md
rename to doc/compile/compile_Linux.md
diff --git a/doc/compile_Windows.md b/doc/compile/compile_Windows.md
similarity index 92%
rename from doc/compile_Windows.md
rename to doc/compile/compile_Windows.md
index 64d68bab1..37925576a 100644
--- a/doc/compile_Windows.md
+++ b/doc/compile/compile_Windows.md
@@ -111,6 +111,15 @@ Do not follow old information that you need the AMD APP SDK. AMD has removed the
cd bin\Release
+ copy C:\xmr-stak-dep\openssl\bin\* .
+ ```
+- For Exclude some of dependence you can follow the command below to set the ENABLE to OFF
+ ```
+ make -G "Visual Studio 15 2017 Win64" -T v141,host=x64 -DCMAKE_BUILD_TYPE=Release -DMICROHTTPD_ENABLE=OFF -DCUDA_ENABLE=OFF -DOpenCL_ENABLE=OFF ..
+ cmake --build . --config Release --target clean
+ cmake --build . --config Release --target install
+ cd bin\Release
+
copy C:\xmr-stak-dep\openssl\bin\* .
```
- Miner is by default compiled for NVIDIA GPUs (if CUDA is installed), AMD GPUs (if the AMD OCL-SDK_light is installed) and CPUs.
diff --git a/doc/compile_macOS.md b/doc/compile/compile_macOS.md
similarity index 100%
rename from doc/compile_macOS.md
rename to doc/compile/compile_macOS.md
diff --git a/doc/troubleshooting.md b/doc/troubleshooting.md
new file mode 100644
index 000000000..fb0dc88ce
--- /dev/null
+++ b/doc/troubleshooting.md
@@ -0,0 +1,119 @@
+# Troubleshooting
+To improve our support we created [Xmr-Stak forum](https://www.reddit.com/r/XmrStak). Check it out if you have a problem, or you are looking for most up to date config for your card and [guides](https://www.reddit.com/r/XmrStak/wiki/index).
+
+
+### 1. CL_MEM_OBJECT_ALLOCATION_FAILURE when calling clEnqueue
+This error means that GPU can't allocate the requested amount of memory that is specified by your config. There is 2 known solutions of this problem:
+
+* Check if you occasionally use too many threads per one GPU (check *index* value in amd.txt)
+* You set too high `intensity` value in amd.txt - try to reduce it to lower values (multiple to `worksize`)
+* If you are using Windows - you may have not enough virtual memory in system. Add virtual memory (don't be afraid if it goes up to 60gb per 6 GPU rig)
+
+
+
+### 2. GPU is not detected
+Check if you have antivirus software turned on. If yes - it could delete some .dll files (for example xmrstak\_cuda\_backend\_cuda10\_0.dll)
+
+
+
+### 3. Illegal Instruction
+This typically means you are trying to run it on a CPU that does not have [AES](https://en.wikipedia.org/wiki/AES_instruction_set). This only happens on older version of miner, new version gives better error message (but still wont' work since your CPU doesn't support the required instructions).
+
+
+
+### 4. Internal compiler error
+Seeing `g++: internal compiler error: Killed (program cc1plus)`is probably related to not enough RAM to compile. 1 Gb RAM should be enough (on clean Ubuntu 16.04).
+
+
+
+### 5. Invalid Result GPU ID
+This error can be caused by several reasons, here is most common, known successful practices how to fix it:
+
+* **Hardware problem: overclock/overvoltage/undervoltage** \- try to use stock clocks and voltages.
+* **Software problem: drivers** \- try to change driver versions (for AMD gpu most commonly stable versions are: blockchain drivers or 18.6.1)
+* **Miner misconfiguration** \- try to reduce `intensity` (if AMD) or `threads` or `bfactor` (if NVIDIA) in config file.
+
+If you still receive these errors, [report please the issue](https://github.com/fireice-uk/xmr-stak/issues).
+
+
+### 6. IP is banned
+Pool has banned your IP, This can be caused by several reasons:
+
+* You selected wrong pool port or the static diff is too low. (Learn more about [pool ports and diff](https://www.reddit.com/r/XmrStak/wiki/guides/other-questions#wiki_1._pool_ports_and_difficulty))
+* You had too many [invalid shares \[8\]](https://www.reddit.com/r/XmrStak/wiki/troubleshooting#wiki_8._invalid_result_gpu_id)
+
+
+
+### 7. MEMORY ALLOC FAILED: mmap failed
+On Linux you will need to configure large page support and increase your memlock limit (`ulimit -l`).
+
+Never put settings directly into `/etc/sysctl.conf` or `/etc/security/limits.conf` as those are system defaults and can be replaced in upgrades, and custom settings in that file are deprecated in all distros since at least wheezy/trusty (has been illegal in RedHat based distros for longer than that), and will be even more deprecated with systemd (it no longer even reads sysctl.conf, ONLY sysctl.d files, for example - there is a link to the old `/etc/sysctl.conf` for backward compatibility but that can go away at any time). Also adding to `/etc/rc.local` is extra incorrect, systemd does not even use that file anymore (once the sysvinit compatibility layer is gone, rc.local will no longer work). To check current settings, run `/sbin/sysctl vm.nr_hugepages ; ulimit -l` as whatever user you will run xmr-stak as (example shows bad/low sample defaults):
+
+ $ /sbin/sysctl vm.nr_hugepages ; ulimit -l vm.nr_hugepages = 0 16
+
+To set large page support, add the following lines to `/etc/sysctl.d/60-hugepages.conf`:
+
+ vm.nr_hugepages=128
+
+You WILL need to run `sudo sysctl --system` for these settings to take effect on your system (or reboot). In some cases (many threads, very large CPU, etc) you may need more than 128 (try 256 if there are still complaints from thread inits)
+
+To increase the memlock (`ulimit -l`), add following lines to `/etc/security/limits.d/60-memlock.conf`:
+
+ * - memlock 262144 root - memlock 262144
+
+You WILL need to log out and log back in for these settings to take effect on your user (no need to reboot, just relogin in your session). Recheck after completing these steps to validate:
+
+ $ /sbin/sysctl vm.nr_hugepages ; ulimit -l vm.nr_hugepages = 128 262144
+
+You can also do it Windows-style and simply run-as-root, but this is NOT recommended for security reasons. Also running as root does not properly get around the `ulimit -l` being large enough (and limits `*` does not apply to `root` either, it must be specified explicitly).
+
+
+### 8. msvcp140.dll and vcruntime140.dll are not available
+Download and install this [runtime package](https://go.microsoft.com/fwlink/?LinkId=746572) from Microsoft.
+
+>***Warning***\*: Do NOT use "missing dll" sites - dll's are exe files with another name, and it is a fairly safe bet that any dll on a shady site like that will be trojaned. Please download offical runtimes from Microsoft above.\*
+
+
+
+###9. Obtaining SeLockMemoryPrivilege failed.
+For professional versions of Windows see [this article](https://msdn.microsoft.com/en-gb/library/ms190730.aspx). Make sure to reboot afterwards!
+
+**For Windows 7/10 Home:**
+
+1. Download and install [Windows Server 2003 Resource Kit Tools](https://www.microsoft.com/en-us/download/details.aspx?id=17657). Ignore any incompatibility warning during installation.
+2. Open cmd or PowerShell as an administrator.
+3. `Use ntrights -u %USERNAME% +r SeLockMemoryPrivilege`where `%USERNAME%` is the user that will be running the program.
+4. Reboot.
+
+Reference: [http://rybkaforum.net/cgi-bin/rybkaforum/topic\_show.pl?pid=259791#pid259791](http://rybkaforum.net/cgi-bin/rybkaforum/topic_show.pl?pid=259791#pid259791)
+
+*Warning: Do not download ntrights.exe from any other site other than the offical Microsoft download page.*
+
+
+### 10. Share rejected - Low diff share
+Check if a coin that you are mining has changed algorithm in one of its forks and you use right hashing algorithm in pools.txt (parameter: `currency`).
+
+
+
+### 11. VirtualAlloc failed
+If you set up the user rights properly ([see issue #7](https://www.reddit.com/r/XmrStak/wiki/troubleshooting#wiki_7._memory_alloc_failed.3A_mmap_failed)), and your system has 4-8GB of RAM (and 50%+ is in use), there is a significant chance that there simply won't be a large enough chunk of contiguous memory because Windows is fairly bad at mitigating memory fragmentation.
+
+If that happens, disable all auto-starting applications and run the miner after a reboot.
+
+
+### 12. (Ubuntu compiling) - Nvidia insufficient driver
+If you have this error after compiling xmr-stak in Ubuntu - make sure you have the latest drivers and not X.org.X Nouveau or v390. Install them manually or with [cuda package](https://www.reddit.com/r/XmrStak/wiki/guides/startup#wiki_2._ubuntu_18.10_setup_.2B_nvidia_.28compiling_from_source.29)
+
+
+
+### 13. (Ubuntu compiling) - Could NOT find OpenCL (missing: OpenCL_LIBRARY OpenCL_INCLUDE_DIR) Cmake error at CmakeLists.txt
+When [compiling in Ubuntu with Nvidia](https://www.reddit.com/r/XmrStak/wiki/guides/startup#wiki_2._ubuntu_18.10_setup_.2B_nvidia_.28compiling_from_source.29) devices, and running `cmake ..` command add additional param that disables OpenCL: `cmake .. -DOpenCL_ENABLE=OFF`
+
+
+
+### 14. (Ubuntu compiling) - gcc v8 is not supported
+Cuda 10 ships with gcc and g++ ver.8 which is not supported. Make sure you [set gcc and g++ to v6](https://www.reddit.com/r/XmrStak/wiki/guides/startup#wiki_2.2_compiling) before compiling. (step 2.2.6)
+
+
+
+
diff --git a/doc/tuning.md b/doc/tuning.md
index 6d07d4ddc..a504b85ef 100644
--- a/doc/tuning.md
+++ b/doc/tuning.md
@@ -3,41 +3,59 @@
## Content Overview
* [Benchmark](#benchmark)
* [Windows](#windows)
+* [Managing GPUs](#managing-GPUs)
* [NVIDIA Backend](#nvidia-backend)
* [Choose Value for `threads` and `blocks`](#choose-value-for-threads-and-blocks)
* [Add more GPUs](#add-more-gpus)
* [AMD Backend](#amd-backend)
* [Choose `intensity` and `worksize`](#choose-intensity-and-worksize)
- * [Add more GPUs](#add-more-gpus)
- * [Two Threads per GPU](two-threads-per-gpu)
- * [Interleave Tuning](interleave-tuning )
+ * [Two Threads per GPU](two-threads-per-GPU)
+ * [Interleave Tuning](interleave-tuning)
* [disable comp_mode](#disable-comp_mode)
- * [change the scratchpad memory pattern](change-the-scratchpad-memory-pattern)
+ * [Auto-tune](#auto-tune)
+ * [Change the scratchpad memory pattern](change-the-scratchpad-memory-pattern)
* [Increase Memory Pool](#increase-memory-pool)
* [Scratchpad Indexing](#scratchpad-indexing)
* [CPU Backend](#cpu-backend)
* [Choose Value for `low_power_mode`](#choose-value-for-low_power_mode)
## Benchmark
-To benchmark the miner speed there are two ways.
- - Mine against a pool end press the key `h` after 30 sec to see the hash report.
- - Start the miner with the cli option `--benchmark BLOCKVERSION`. The miner will not connect to any pool and performs a 60sec performance benchmark with all enabled back-ends.
+You can benchmark the miner in two ways:
+ - Edit `config.txt` and set `verbose_level` to 4 and `h_print_time` to 30 and start the miner. You will see hash report each 30 seconds.
+ - Start the miner with the cli option `--benchmark BLOCKVERSION`. The miner will not connect to any pool and performs a 60sec performance benchmark with all enabled backends.
## Windows
"Run As Administrator" prompt (UAC) confirmation is needed to use large pages on Windows 7.
On Windows 10 it is only needed once to set up the account to use them.
Disable the dialog with the command line option `--noUAC`
+### Managing GPUs
+
+To turn on and off a GPU you need to add/remove config set to `GPU_threads_conf`.
+`index` is the number of the GPU, the index order not follow the order from `nvidia-smi` or the order shown in windows.
+
+```
+"GPU_threads_conf" :
+[
+ { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0,
+ "affine_to_cpu" : false, "sync_mode" : 3, "mem_mode" : 1,
+ },
+ { "index" : 1, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0,
+ "affine_to_cpu" : false, "sync_mode" : 3, "mem_mode" : 1,
+ },
+],
+```
+
## NVIDIA Backend
By default the NVIDIA backend can be tuned in the config file `nvidia.txt`
### Choose Value for `threads` and `blocks`
-The optimal parameter for the `threads` and `blocks` option in `config.txt` depend on your GPU.
-For all GPU's with a compute capability `>=2.0` and `<6.0` there is a restriction of the amount of RAM that can be used for the mining algorithm.
-The maximum RAM that can be used must be less than 2GB (e.g. GTX TITAN) or 1GB (e.g. GTX 750-TI).
-The amount of RAM used for mining can be changed with `"threads" : T, "blocks : B"`.
+The optimal values for the `threads` and `blocks` parameters in `nvidia.txt` depend on your GPU model and selected mining algorithm.
+For all GPU's with a compute capability `>=2.0` and `<6.0` there is a restriction of the amount of vRAM that can be used for the mining algorithm.
+The maximum vRAM that can be used must be less than 2GB (e.g. GTX TITAN) or 1GB (e.g. GTX 750-TI).
+The amount of vRAM used for mining can be changed with `"threads" : T, "blocks : B"`.
- `T` = threads used per block
- `B` = CUDA blocks started (should be a multiple of the multiprocessors `M` on the GPU)
@@ -48,23 +66,6 @@ and full fill all restrictions `16 * 48 * 2 = 1536` and `48 mod 24 = 0`.
The memory limit for NVIDIA Pascal GPUs is `16` GiB if the newest CUDA driver is used.
-### Add More GPUs
-
-To add a new GPU you need to add a new config set to `gpu_threads_conf`.
-`index` is the number of the gpu, the index order not follow the order from `nvidia-smi` or the order shown in windows.
-
-```
-"gpu_threads_conf" :
-[
- { "index" : 0, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0,
- "affine_to_cpu" : false, "sync_mode" : 3, "mem_mode" : 1,
- },
- { "index" : 1, "threads" : 17, "blocks" : 60, "bfactor" : 0, "bsleep" : 0,
- "affine_to_cpu" : false, "sync_mode" : 3, "mem_mode" : 1,
- },
-],
-```
-
## AMD Backend
By default the AMD backend can be tuned in the config file `amd.txt`
@@ -75,38 +76,16 @@ Intensity means the number of threads used to mine. The maximum intensity is GPU
`worksize` is the number of threads working together to increase the miner performance.
In the most cases a `worksize` of `16` or `8` is optimal.
-### Add More GPUs
-
-To add a new GPU you need to add a new config set to `gpu_threads_conf`. `index` is the OpenCL index of the gpu.
-`platform_index`is the index of the OpenCL platform (Intel / AMD / Nvidia).
-If you are unsure of either GPU or platform index value, you can use `clinfo` tool that comes with AMD APP SDK to dump the values.
-
-```
-"gpu_threads_conf" :
-[
- { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,
- "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true,
- "interleave" : 40
- },
- { "index" : 1, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,
- "strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true,
- "interleave" : 40
- },
-],
-
-"platform_index" : 0,
-```
### Two Threads per GPU
-Some GPUs like AMD Vega can mine faster if two threads are using the same GPU.
-Use the auto generated config as base and repeat the config entry for a GPU.
-If the attribute `index` is used twice than two threads will use one GPU.
-Take care that the required memory usage on the GPU will also double.
-Therefore adjust your intensity by hand.
+Some AMD GPUs can mine faster on some mining algorithms if two threads are using the same GPU.
+If you have `amd.txt` config with one `index` entry per GPU - duplicate these entries to run 2 threads per GPU.
+*Notice*: Keep in mind that the memory usage on the GPU will also double - therefore adjust your `intensity` by hand.
+Example of 2-threaded config:
```
-"gpu_threads_conf" :
+"GPU_threads_conf" :
[
{ "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,
"strided_index" : true, "mem_chunk" : 2, "unroll" : 8, "comp_mode" : true,
@@ -123,43 +102,88 @@ Therefore adjust your intensity by hand.
### Interleave Tuning
-Interleave controls when a worker thread is starting to calculate a bunch of hashes
-if two worker threads are used to utilize one GPU.
-This option has no effect if only one worker thread is used per GPU.
+**Note 1:** Interleaving is available for AMD GPUs only.
-
+**Note 2** Interleaving has effect only when 2+ threads are used per each GPU.
-Interleave defines how long a thread needs to wait to start the next hash calculation relative to the last started worker thread.
-To choose a interleave value larger than 50% makes no sense because than the gpu will not be utilized well enough.
-In the most cases the default 40 is a good value but on some systems e.g. Linux Rocm 1.9.1 driver with RX5XX you need to adjust the value.
-If you get many interleave message in a row (over 1 minute) you should adjust the value.
+Interleave controls when a worker thread is starting to calculate hashes if two worker threads are used to utilize one GPU. This parameter is designed to reduce total idle periods of GPU while mining
-```
-OpenCL Interleave 0|1: 642/2400.50 ms - 30.1
-OpenCL Interleave 0|0: 355/2265.05 ms - 30.2
-OpenCL Interleave 0|1: 221/2215.65 ms - 30.2
-```
+
-description:
-```
-|: / ms -
+**1.Reading and understanding the log:**
-```
-`last delay` should gou slowly to 0.
-If it goes down and than jumps to a very large value multiple times within a minute you should reduce the intensity by 5.
-The `intensity value` will automatically go up and down within the range of +-5% to adjust kernel run-time fluctuations.
-Automatic adjustment is disabled as long as `auto-tuning` is active and will be started after it is finished.
-If `last delay` goes down to 10ms and the messages stops and repeated from time to time with delays up to 15ms you will have already a good value.
+`OpenCL Interleave 0|0: 265/1372.30 ms - 40.1`
+`OpenCL Interleave 0|1: 125/1330.10 ms - 40.2`
+`OpenCL Interleave 0|0: 74/1323.67 ms - 40.2`
+`OpenCL Interleave 0|1: 43/1312.01 ms - 40.2`
+`OpenCL Interleave 0|1: 16/1283.20 ms - 40.2`
+
+Reads as:
+`OpenCL Interleave GPU ID|Thread ID: last delay/average calculation time per hash bunch - interleave value`
+
+
+**2.Do I need to adjust it?**
+In general, interleaving can be used as representation how 2-threading works with your GPU at current set of settings (including GPU power profile, miner settings, drivers). And default value `"interleave" : 40` in `amd.txt` works good in most cases.
+
+2.1 Optimal setup: After you started mining you have `last delay` value reduced over time to minimum possible value and stays at it. The best scenario is when `last delay` value settled around 10-15 and interleave messages appear rarely. The reported hashrate will be close to max. of GPU capabilities.
+
+2.2 Not optimal setup: After you started mining you have `last delay` value reducing over time and jumping back to high values, or rising after the start of mining. The reported hashrate will be lower compared to max. possible.
+
+**3.Adjusting Interleaving and optimizing hashrate**
+**Note:** setting `interleave` value in amd.txt higher than 50 has no practical sense
+
+If you faced situation described in 2.2 then you need to keep in mind that this can be caused by several possible reasons, so treat them accordingly and start miner after each attempt and check logs and hashrate:
+
+- Miner misconfiguration 1: Adjust "interleave" in amd.txt by couple points +/-
+- Miner misconfiguration 2: Adjust "intensity" in amd.txt by setting lower value (multiple to "worksize" value)
+- GPU overclock: Reduce overclock/overvoltage values of GPU memory and GPU core
+- Drivers issue: Try [reinstalling your drivers](https://www.amd.com/en/support) (there are 3 possible options to try: blockchain drivers, v18.6.1, or newest version)
+
### disable comp_mode
`comp_mode` means compatibility mode and removes some checks in compute kernel those takes care that the miner can be used on a wide range of AMD/OpenCL GPU devices.
To avoid miner crashes the `intensity` should be a multiple of `worksize` if `comp_mode` is `false`.
-### change the scratchpad memory pattern
+### Auto tune
+
+**Note:** This feature is available for AMD gpus only.
+
+Auto-tuning feature may help you to speed up seek process of finding optimal intensity for your GPU (vs manual check, in case if you want to compare autogenerated intensity with the most performing value).
+
+When set, miner will perform several (defined by user) rounds per each intensity check of given range. When setting number of rounds - keep in mind that you want to have a balance of speed and reliability of the checking.
+
+After setting number of checks per intensity value, you will need to set ceiling value after which the miner will stop checking intensity values.
+
+**1.Enabling and configuring auto-tune**
+Navigate to amd.txt config file in miner's folder, find (in the bottom part) parameter "auto_tune" : 0, and set it to "auto_tune" : 6, (6-10 rounds per intensity value suits most cases.)
+Set autogenerated value of "intensity" : X, for each thread in amd.txt to slightly higher level (e.g. from 890 to 1000)
+Start xmr-stak.exe
+
+**2. Reading and understanding the log**
+Here is an example of log for 1 GPU with 2 threads (your values will vary):
+`OpenCL 0|0: auto-tune validate intensity 848|840`
+`OpenCL 0|1: auto-tune validate intensity 848|840`
+`OpenCL 0|0: auto-tune validate intensity 856|848`
+`OpenCL 0|1: auto-tune validate intensity 856|848`
+Reads as: `OpenCL GPU ID|Thread ID auto-tune validate intensity Currently checked value|last succesfully checked value`
+
+After the checking, you will see
+
+`OpenCL 0|0: lock intensity at 896`
+`OpenCL 0|1: lock intensity at 896`
+Write down these locked intensity values and stop miner.
+
+**3. Finalizing setup**
+Set "auto_tune" value (step 1.1) in `amd.txt` back to "auto_tune" : 0,
+Enter locked intensity values from step 2.
+Start miner.
+
+### Change the scratchpad memory pattern
By changing `strided_index` to `2` the number of contiguous elements (a 16 byte) for one miner thread can be fine tuned with the option `mem_chunk`.
+
### Increase Memory Pool
By setting the following environment variables before the miner is started OpenCl allows the miner to more threads.
diff --git a/doc/usage.md b/doc/usage.md
index 82d26dcc5..800ff6949 100644
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -1,9 +1,9 @@
-# HowTo Use xmr-stak
+# HowTo Use Xmr-Stak
## Content Overview
-* [Configuration](#configuration)
+* [Configurations](#configurations)
* [Usage on Windows](#usage-on-windows)
-* [Usage on Linux](#usage-on-linux)
+* [Usage on Linux & macOS](#usage-on-linux--macos)
* [Command Line Options](#command-line-options)
* [Use different backends](#use-different-backends)
* [HTML and JSON API report configuraton](#html-and-json-api-report-configuraton)
@@ -77,6 +77,6 @@ Debug the docker image by getting inside:
docker run --entrypoint=/bin/bash --rm -it -u $(id -u):$(id -g) --name fireice-uk/xmr-stak -v "$PWD":/mnt xmr-stak
```
-## HTML and JSON API report configuraton
+## HTML and JSON API report configuration
To configure the reports shown on the [README](../README.md) side you need to edit the httpd_port variable. Then enable wifi on your phone and navigate to [miner ip address]:[httpd_port] in your phone browser. If you want to use the data in scripts, you can get the JSON version of the data at url [miner ip address]:[httpd_port]/api.json
diff --git a/xmrstak/backend/amd/OclCryptonightR_gen.cpp b/xmrstak/backend/amd/OclCryptonightR_gen.cpp
index ccb836e41..2a60c46d9 100644
--- a/xmrstak/backend/amd/OclCryptonightR_gen.cpp
+++ b/xmrstak/backend/amd/OclCryptonightR_gen.cpp
@@ -1,19 +1,18 @@
-#include
-#include
-#include
#include
+#include
+#include
+#include
#include
-
#include "xmrstak/backend/amd/OclCryptonightR_gen.hpp"
#include "xmrstak/backend/cpu/crypto/variant4_random_math.h"
-#include "xmrstak/misc/console.hpp"
#include "xmrstak/cpputil/read_write_lock.h"
+#include "xmrstak/misc/console.hpp"
#include
-#include
#include
-
+#include
+#include
namespace xmrstak
{
@@ -22,16 +21,16 @@ namespace amd
static std::string get_code(const V4_Instruction* code, int code_size)
{
- std::stringstream s;
+ std::stringstream s;
- for (int i = 0; i < code_size; ++i)
+ for(int i = 0; i < code_size; ++i)
{
const V4_Instruction inst = code[i];
const uint32_t a = inst.dst_index;
const uint32_t b = inst.src_index;
- switch (inst.opcode)
+ switch(inst.opcode)
{
case MUL:
s << 'r' << a << "*=r" << b << ';';
@@ -58,37 +57,39 @@ static std::string get_code(const V4_Instruction* code, int code_size)
s << '\n';
}
- return s.str();
+ return s.str();
}
struct CacheEntry
{
- CacheEntry(xmrstak_algo algo, uint64_t height, size_t deviceIdx, cl_program program) :
- algo(algo),
- height(height),
- deviceIdx(deviceIdx),
- program(program)
- {}
-
- xmrstak_algo algo;
- uint64_t height;
- size_t deviceIdx;
- cl_program program;
+ CacheEntry(xmrstak_algo algo, uint64_t height_offset, size_t deviceIdx, cl_program program) :
+ algo(algo),
+ height_offset(height_offset),
+ deviceIdx(deviceIdx),
+ program(program)
+ {
+ }
+
+ xmrstak_algo algo;
+ uint64_t height_offset;
+ size_t deviceIdx;
+ cl_program program;
};
struct BackgroundTaskBase
{
- virtual ~BackgroundTaskBase() {}
- virtual void exec() = 0;
+ virtual ~BackgroundTaskBase() {}
+ virtual void exec() = 0;
};
-template
+template
struct BackgroundTask : public BackgroundTaskBase
{
- BackgroundTask(T&& func) : m_func(std::move(func)) {}
- void exec() override { m_func(); }
+ BackgroundTask(T&& func) :
+ m_func(std::move(func)) {}
+ void exec() override { m_func(); }
- T m_func;
+ T m_func;
};
static ::cpputil::RWLock CryptonightR_cache_mutex;
@@ -99,94 +100,113 @@ static std::mutex background_tasks_mutex;
static std::vector background_tasks;
static std::thread* background_thread = nullptr;
+static cl_program search_program(
+ const GpuContext* ctx,
+ xmrstak_algo algo,
+ uint64_t height_offset,
+ bool lock_cache = true)
+{
+ if(lock_cache)
+ CryptonightR_cache_mutex.ReadLock();
+
+ // Check if the cache has this program
+ for(const CacheEntry& entry : CryptonightR_cache)
+ {
+ if((entry.algo == algo) && (entry.height_offset == height_offset) && (entry.deviceIdx == ctx->deviceIdx))
+ {
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height_offset %llu found in cache", height_offset);
+ auto result = entry.program;
+ if(lock_cache)
+ CryptonightR_cache_mutex.UnLock();
+ return result;
+ }
+ }
+ if(lock_cache)
+ CryptonightR_cache_mutex.UnLock();
+
+ return nullptr;
+}
+
static void background_thread_proc()
{
- std::vector tasks;
- for (;;) {
- tasks.clear();
- {
- std::lock_guard g(background_tasks_mutex);
- background_tasks.swap(tasks);
- }
-
- for (BackgroundTaskBase* task : tasks) {
- task->exec();
- delete task;
- }
+ std::vector tasks;
+ for(;;)
+ {
+ tasks.clear();
+ {
+ std::lock_guard g(background_tasks_mutex);
+ background_tasks.swap(tasks);
+ }
+
+ for(BackgroundTaskBase* task : tasks)
+ {
+ task->exec();
+ delete task;
+ }
std::this_thread::sleep_for(std::chrono::milliseconds(500));
- }
+ }
}
-template
+template
static void background_exec(T&& func)
{
- BackgroundTaskBase* task = new BackgroundTask(std::move(func));
+ BackgroundTaskBase* task = new BackgroundTask(std::move(func));
- std::lock_guard g(background_tasks_mutex);
- background_tasks.push_back(task);
- if (!background_thread) {
- background_thread = new std::thread(background_thread_proc);
- }
+ std::lock_guard g(background_tasks_mutex);
+ background_tasks.push_back(task);
+ if(!background_thread)
+ {
+ background_thread = new std::thread(background_thread_proc);
+ }
}
static cl_program CryptonightR_build_program(
- const GpuContext* ctx,
- xmrstak_algo algo,
- uint64_t height,
- uint32_t precompile_count,
- std::string source_code,
- std::string options)
+ const GpuContext* ctx,
+ xmrstak_algo algo,
+ uint64_t height_offset,
+ uint64_t height_chunk_size,
+ uint32_t precompile_count,
+ std::string source_code,
+ std::string options)
{
- std::vector old_programs;
- old_programs.reserve(32);
- {
+ std::vector old_programs;
+ old_programs.reserve(32);
+ {
CryptonightR_cache_mutex.WriteLock();
- // Remove old programs from cache
- for(size_t i = 0; i < CryptonightR_cache.size();)
- {
- const CacheEntry& entry = CryptonightR_cache[i];
- if ((entry.algo == algo) && (entry.height + 2 + precompile_count < height))
- {
- printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu released (old program)", entry.height);
- old_programs.push_back(entry.program);
- CryptonightR_cache[i] = std::move(CryptonightR_cache.back());
- CryptonightR_cache.pop_back();
- }
- else
- {
- ++i;
- }
- }
+ // Remove old programs from cache
+ for(size_t i = 0; i < CryptonightR_cache.size();)
+ {
+ const CacheEntry& entry = CryptonightR_cache[i];
+ if((entry.algo == algo) && (entry.height_offset + (2 + precompile_count) * height_chunk_size < height_offset))
+ {
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height_offset %llu released (old program)", entry.height_offset);
+ old_programs.push_back(entry.program);
+ CryptonightR_cache[i] = std::move(CryptonightR_cache.back());
+ CryptonightR_cache.pop_back();
+ }
+ else
+ {
+ ++i;
+ }
+ }
CryptonightR_cache_mutex.UnLock();
- }
-
- for(cl_program p : old_programs) {
- clReleaseProgram(p);
- }
+ }
- std::lock_guard g1(CryptonightR_build_mutex);
+ for(cl_program p : old_programs)
+ {
+ clReleaseProgram(p);
+ }
- cl_program program = nullptr;
- {
- CryptonightR_cache_mutex.ReadLock();
+ std::lock_guard g1(CryptonightR_build_mutex);
- // Check if the cache already has this program (some other thread might have added it first)
- for (const CacheEntry& entry : CryptonightR_cache)
- {
- if ((entry.algo == algo) && (entry.height == height) && (entry.deviceIdx == ctx->deviceIdx))
- {
- program = entry.program;
- break;
- }
- }
- CryptonightR_cache_mutex.UnLock();
- }
+ cl_program program = search_program(ctx, algo, height_offset);
- if (program) {
- return program;
- }
+ if(program)
+ {
+ return program;
+ }
cl_int ret;
const char* source = source_code.c_str();
@@ -194,7 +214,7 @@ static cl_program CryptonightR_build_program(
program = clCreateProgramWithSource(ctx->opencl_ctx, 1, (const char**)&source, NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L0,"Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret));
+ printer::inst()->print_msg(L0, "Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret));
return program;
}
@@ -202,11 +222,11 @@ static cl_program CryptonightR_build_program(
if(ret != CL_SUCCESS)
{
size_t len;
- printer::inst()->print_msg(L0,"Error %s when calling clBuildProgram.", err_to_str(ret));
+ printer::inst()->print_msg(L0, "Error %s when calling clBuildProgram.", err_to_str(ret));
if((ret = clGetProgramBuildInfo(program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L0,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret));
+ printer::inst()->print_msg(L0, "Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret));
return program;
}
@@ -216,12 +236,12 @@ static cl_program CryptonightR_build_program(
if((ret = clGetProgramBuildInfo(program, ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS)
{
free(BuildLog);
- printer::inst()->print_msg(L0,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret));
+ printer::inst()->print_msg(L0, "Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret));
return program;
}
printer::inst()->print_str("Build log:\n");
- std::cerr<DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L0,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret));
+ printer::inst()->print_msg(L0, "Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret));
return program;
}
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
- }
- while(status == CL_BUILD_IN_PROGRESS);
+ } while(status == CL_BUILD_IN_PROGRESS);
+ CryptonightR_cache_mutex.WriteLock();
+ auto cached_program = search_program(ctx, algo, height_offset, false);
- printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu compiled", height);
+ if(cached_program)
+ {
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: release already existing program %llu", height_offset);
+ clReleaseProgram(program);
+ program = cached_program;
+ }
+ else
+ {
+ CryptonightR_cache.emplace_back(algo, height_offset, ctx->deviceIdx, program);
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: cache compiled program for height_offset %llu", height_offset);
+ }
- CryptonightR_cache_mutex.WriteLock();
- CryptonightR_cache.emplace_back(algo, height, ctx->deviceIdx, program);
CryptonightR_cache_mutex.UnLock();
- return program;
+ return program;
}
-cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t height, uint32_t precompile_count, bool background)
+cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t height_offset, uint64_t height_chunk_size, uint32_t precompile_count, bool background)
{
- printer::inst()->print_msg(LDEBUG, "CryptonightR: start %llu released",height);
-
- if (background) {
- background_exec([=](){ CryptonightR_get_program(ctx, algo, height, precompile_count, false); });
- return nullptr;
- }
-
- const char* source_code_template =
- #include "amd_gpu/opencl/wolf-aes.cl"
- #include "amd_gpu/opencl/cryptonight_r.cl"
- ;
- const char include_name[] = "XMRSTAK_INCLUDE_RANDOM_MATH";
- const char* offset = strstr(source_code_template, include_name);
- if (!offset)
- {
- printer::inst()->print_msg(LDEBUG, "CryptonightR_get_program: XMRSTAK_INCLUDE_RANDOM_MATH not found in cryptonight_r.cl", algo);
- return nullptr;
- }
-
- V4_Instruction code[256];
- int code_size;
- switch (algo.Id())
- {
- case cryptonight_r_wow:
- code_size = v4_random_math_init(code, height);
- break;
- case cryptonight_r:
- code_size = v4_random_math_init(code, height);
- break;
- default:
- printer::inst()->print_msg(L0, "CryptonightR_get_program: invalid algo %d", algo);
- return nullptr;
- }
-
- std::string source_code(source_code_template, offset);
- source_code.append(get_code(code, code_size));
- source_code.append(offset + sizeof(include_name) - 1);
+ if(background)
+ {
+ background_exec([=]() { CryptonightR_get_program(ctx, algo, height_offset, height_chunk_size, precompile_count, false); });
+ return nullptr;
+ }
+
+ auto program = search_program(ctx, algo, height_offset);
+
+ if(program != nullptr)
+ return program;
+
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: create code for block %llu to %llu", height_offset, height_offset + height_chunk_size);
+
+ const char* source_code_definitions =
+#include "amd_gpu/opencl/cryptonight_r_def.rtcl"
+#include "amd_gpu/opencl/wolf-aes.cl"
+ ;
+
+ const char* source_code_template =
+#include "amd_gpu/opencl/cryptonight_r.rtcl"
+ ;
+ const char include_name[] = "XMRSTAK_INCLUDE_RANDOM_MATH";
+ const char* offset = strstr(source_code_template, include_name);
+ if(!offset)
+ {
+ printer::inst()->print_msg(LDEBUG, "CryptonightR_get_program: XMRSTAK_INCLUDE_RANDOM_MATH not found in cryptonight_r.cl", algo);
+ return nullptr;
+ }
+
+ std::string source_code(source_code_definitions);
+
+ for(uint64_t c = 0; c < height_chunk_size; ++c)
+ {
+ V4_Instruction code[256];
+ int code_size;
+ switch(algo.Id())
+ {
+ case cryptonight_r_wow:
+ code_size = v4_random_math_init(code, height_offset + c);
+ break;
+ case cryptonight_r:
+ code_size = v4_random_math_init(code, height_offset + c);
+ break;
+ default:
+ printer::inst()->print_msg(L0, "CryptonightR_get_program: invalid algo %d", algo);
+ return nullptr;
+ }
+
+ std::string kernel_code(source_code_template, offset);
+ kernel_code.append(get_code(code, code_size));
+ kernel_code.append(offset + sizeof(include_name) - 1);
+
+ std::string kernel_name = "cn1_cryptonight_r_" + std::to_string(height_offset + c);
+
+ source_code += std::regex_replace(kernel_code, std::regex("cn1_cryptonight_r"), kernel_name);
+ }
// scratchpad size for the selected mining algorithm
size_t hashMemSize = algo.Mem();
@@ -324,28 +372,12 @@ cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t
if(algo == cryptonight_gpu)
options += " -cl-fp32-correctly-rounded-divide-sqrt";
+ program = search_program(ctx, algo, height_offset);
- const char* source = source_code.c_str();
-
- {
- CryptonightR_cache_mutex.ReadLock();
-
- // Check if the cache has this program
- for (const CacheEntry& entry : CryptonightR_cache)
- {
- if ((entry.algo == algo) && (entry.height == height) && (entry.deviceIdx == ctx->deviceIdx))
- {
- printer::inst()->print_msg(LDEBUG, "CryptonightR: program for height %llu found in cache", height);
- auto result = entry.program;
- CryptonightR_cache_mutex.UnLock();
- return result;
- }
- }
- CryptonightR_cache_mutex.UnLock();
-
- }
+ if(program != nullptr)
+ return program;
- return CryptonightR_build_program(ctx, algo, height, precompile_count, source, options);
+ return CryptonightR_build_program(ctx, algo, height_offset, precompile_count, height_chunk_size, source_code, options);
}
} // namespace amd
diff --git a/xmrstak/backend/amd/OclCryptonightR_gen.hpp b/xmrstak/backend/amd/OclCryptonightR_gen.hpp
index 7dce77b85..f8772b1f5 100644
--- a/xmrstak/backend/amd/OclCryptonightR_gen.hpp
+++ b/xmrstak/backend/amd/OclCryptonightR_gen.hpp
@@ -3,8 +3,8 @@
#include "xmrstak/backend/cryptonight.hpp"
#include
-#include
#include
+#include
#if defined(__APPLE__)
#include
@@ -20,7 +20,7 @@ namespace amd
{
cl_program CryptonightR_get_program(GpuContext* ctx, const xmrstak_algo algo,
- uint64_t height, uint32_t precompile_count, bool background = false);
+ uint64_t height_offset, uint64_t height_chunk_size, uint32_t precompile_count, bool background = false);
} // namespace amd
} // namespace xmrstak
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp
index 9f3f75469..3c4384722 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.cpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp
@@ -13,45 +13,43 @@
* along with this program. If not, see .
*/
+#include "xmrstak/backend/amd/OclCryptonightR_gen.hpp"
#include "xmrstak/backend/cryptonight.hpp"
#include "xmrstak/jconf.hpp"
-#include "xmrstak/picosha2/picosha2.hpp"
+#include "xmrstak/net/msgstruct.hpp"
#include "xmrstak/params.hpp"
+#include "xmrstak/picosha2/picosha2.hpp"
#include "xmrstak/version.hpp"
-#include "xmrstak/net/msgstruct.hpp"
-#include "xmrstak/backend/amd/OclCryptonightR_gen.hpp"
+#include
+#include
+#include
+#include
+#include
#include
#include
-#include
-#include
#include
-#include
-#include
-#include
-#include
#include
+#include
#include
-#include
#include
-#include
#include
+#include
#if defined _MSC_VER
#include
#elif defined __GNUC__
-#include
#include
+#include
#endif
-
#ifdef _WIN32
#include
static inline void create_directory(std::string dirname)
{
- _mkdir(dirname.data());
+ _mkdir(dirname.data());
}
static inline void port_sleep(size_t sec)
@@ -59,8 +57,8 @@ static inline void port_sleep(size_t sec)
Sleep(sec * 1000);
}
#else
-#include
#include
+#include
static inline void create_directory(std::string dirname)
{
@@ -100,7 +98,7 @@ char* LoadTextFile(const char* filename)
flen = ftell(kernel);
fseek(kernel, 0, SEEK_SET);
- out = (char*)malloc(flen+1);
+ out = (char*)malloc(flen + 1);
size_t r = fread(out, flen, 1, kernel);
fclose(kernel);
@@ -121,7 +119,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
if((ret = clGetDeviceInfo(ctx->DeviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &MaximumWorkSize, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when querying a device's max worksize using clGetDeviceInfo.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when querying a device's max worksize using clGetDeviceInfo.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -140,16 +138,16 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
*/
MaximumWorkSize /= 8;
}
- printer::inst()->print_msg(L1,"Device %lu work size %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize);
+ printer::inst()->print_msg(L1, "Device %lu work size %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize);
if(ctx->workSize > MaximumWorkSize)
{
ctx->workSize = MaximumWorkSize;
- printer::inst()->print_msg(L1,"Device %lu work size to large, reduce to %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize);
+ printer::inst()->print_msg(L1, "Device %lu work size to large, reduce to %lu / %lu.", ctx->deviceIdx, ctx->workSize, MaximumWorkSize);
}
const std::string backendName = xmrstak::params::inst().openCLVendor;
- if( (ctx->stridedIndex == 2 || ctx->stridedIndex == 3) && (ctx->rawIntensity % ctx->workSize) != 0)
+ if((ctx->stridedIndex == 2 || ctx->stridedIndex == 3) && (ctx->rawIntensity % ctx->workSize) != 0)
{
size_t reduced_intensity = (ctx->rawIntensity / ctx->workSize) * ctx->workSize;
ctx->rawIntensity = reduced_intensity;
@@ -157,29 +155,29 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
}
#if defined(CL_VERSION_2_0) && !defined(CONF_ENFORCE_OpenCL_1_2)
- const cl_queue_properties CommandQueueProperties[] = { 0, 0, 0 };
+ const cl_queue_properties CommandQueueProperties[] = {0, 0, 0};
ctx->CommandQueues = clCreateCommandQueueWithProperties(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret);
#else
- const cl_command_queue_properties CommandQueueProperties = { 0 };
+ const cl_command_queue_properties CommandQueueProperties = {0};
ctx->CommandQueues = clCreateCommandQueue(opencl_ctx, ctx->DeviceID, CommandQueueProperties, &ret);
#endif
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateCommandQueueWithProperties.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateCommandQueueWithProperties.", err_to_str(ret));
return ERR_OCL_API;
}
if((ret = clGetDeviceInfo(ctx->DeviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx->computeUnits), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_MAX_COMPUTE_UNITS for device %u.", err_to_str(ret), (uint32_t)ctx->deviceIdx);
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_MAX_COMPUTE_UNITS for device %u.", err_to_str(ret), (uint32_t)ctx->deviceIdx);
return ERR_OCL_API;
}
ctx->InputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_ONLY, 128, NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create input buffer.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateBuffer to create input buffer.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -193,14 +191,14 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
ctx->ExtraBuffers[0] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, scratchPadSize * g_thd, NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash scratchpads buffer.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateBuffer to create hash scratchpads buffer.", err_to_str(ret));
return ERR_OCL_API;
}
ctx->ExtraBuffers[1] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, 200 * g_thd, NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create hash states buffer.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateBuffer to create hash states buffer.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -208,7 +206,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
ctx->ExtraBuffers[2] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 0 buffer.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateBuffer to create Branch 0 buffer.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -216,7 +214,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
ctx->ExtraBuffers[3] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 1 buffer.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateBuffer to create Branch 1 buffer.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -224,7 +222,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
ctx->ExtraBuffers[4] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 2 buffer.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateBuffer to create Branch 2 buffer.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -232,7 +230,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
ctx->ExtraBuffers[5] = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create Branch 3 buffer.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateBuffer to create Branch 3 buffer.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -240,21 +238,21 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
ctx->OutputBuffer = clCreateBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateBuffer to create output buffer.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateBuffer to create output buffer.", err_to_str(ret));
return ERR_OCL_API;
}
std::vector devNameVec(1024);
if((ret = clGetDeviceInfo(ctx->DeviceID, CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(ret),ctx->deviceIdx );
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(ret), ctx->deviceIdx);
return ERR_OCL_API;
}
std::vector openCLDriverVer(1024);
if((ret = clGetDeviceInfo(ctx->DeviceID, CL_DRIVER_VERSION, openCLDriverVer.size(), openCLDriverVer.data(), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DRIVER_VERSION for device %u.", err_to_str(ret),ctx->deviceIdx );
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceInfo to get CL_DRIVER_VERSION for device %u.", err_to_str(ret), ctx->deviceIdx);
return ERR_OCL_API;
}
@@ -342,11 +340,11 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
if(xmrstak::params::inst().AMDCache == false || !clBinFile.good())
{
if(xmrstak::params::inst().AMDCache)
- printer::inst()->print_msg(L1,"OpenCL device %u - Precompiled code %s not found. Compiling ...",ctx->deviceIdx, cache_file.c_str());
+ printer::inst()->print_msg(L1, "OpenCL device %u - Precompiled code %s not found. Compiling ...", ctx->deviceIdx, cache_file.c_str());
ctx->Program[miner_algo] = clCreateProgramWithSource(opencl_ctx, 1, (const char**)&source_code, NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateProgramWithSource on the OpenCL miner code", err_to_str(ret));
return ERR_OCL_API;
}
@@ -354,11 +352,11 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
if(ret != CL_SUCCESS)
{
size_t len;
- printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clBuildProgram.", err_to_str(ret));
if((ret = clGetProgramBuildInfo(ctx->Program[miner_algo], ctx->DeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clGetProgramBuildInfo for length of build log output.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -368,28 +366,27 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
if((ret = clGetProgramBuildInfo(ctx->Program[miner_algo], ctx->DeviceID, CL_PROGRAM_BUILD_LOG, len, BuildLog, NULL)) != CL_SUCCESS)
{
free(BuildLog);
- printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clGetProgramBuildInfo for build log.", err_to_str(ret));
return ERR_OCL_API;
}
printer::inst()->print_str("Build log:\n");
- std::cerr<Program[miner_algo], CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices,NULL);
-
+ clGetProgramInfo(ctx->Program[miner_algo], CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL);
std::vector devices_ids(num_devices);
- clGetProgramInfo(ctx->Program[miner_algo], CL_PROGRAM_DEVICES, sizeof(cl_device_id)* devices_ids.size(), devices_ids.data(),NULL);
+ clGetProgramInfo(ctx->Program[miner_algo], CL_PROGRAM_DEVICES, sizeof(cl_device_id) * devices_ids.size(), devices_ids.data(), NULL);
int dev_id = 0;
/* Search for the gpu within the program context.
* The id can be different to ctx->DeviceID.
*/
- for(auto & ocl_device : devices_ids)
+ for(auto& ocl_device : devices_ids)
{
if(ocl_device == ctx->DeviceID)
break;
@@ -401,17 +398,16 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
{
if((ret = clGetProgramBuildInfo(ctx->Program[miner_algo], ctx->DeviceID, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clGetProgramBuildInfo for status of build.", err_to_str(ret));
return ERR_OCL_API;
}
port_sleep(1);
- }
- while(status == CL_BUILD_IN_PROGRESS);
+ } while(status == CL_BUILD_IN_PROGRESS);
if(xmrstak::params::inst().AMDCache)
{
std::vector binary_sizes(num_devices);
- clGetProgramInfo (ctx->Program[miner_algo], CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL);
+ clGetProgramInfo(ctx->Program[miner_algo], CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * binary_sizes.size(), binary_sizes.data(), NULL);
std::vector all_programs(num_devices);
std::vector> program_storage;
@@ -419,7 +415,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
int p_id = 0;
size_t mem_size = 0;
// create memory structure to query all OpenCL program binaries
- for(auto & p : all_programs)
+ for(auto& p : all_programs)
{
program_storage.emplace_back(std::vector(binary_sizes[p_id]));
all_programs[p_id] = program_storage[p_id].data();
@@ -427,9 +423,9 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
p_id++;
}
- if((ret = clGetProgramInfo(ctx->Program[miner_algo], CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(),NULL)) != CL_SUCCESS)
+ if((ret = clGetProgramInfo(ctx->Program[miner_algo], CL_PROGRAM_BINARIES, num_devices * sizeof(char*), all_programs.data(), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clGetProgramInfo.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clGetProgramInfo.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -437,12 +433,12 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
file_stream.open(cache_file, std::ofstream::out | std::ofstream::binary);
file_stream.write(all_programs[dev_id], binary_sizes[dev_id]);
file_stream.close();
- printer::inst()->print_msg(L1, "OpenCL device %u - Precompiled code stored in file %s",ctx->deviceIdx, cache_file.c_str());
+ printer::inst()->print_msg(L1, "OpenCL device %u - Precompiled code stored in file %s", ctx->deviceIdx, cache_file.c_str());
}
}
else
{
- printer::inst()->print_msg(L1, "OpenCL device %u - Load precompiled code from file %s",ctx->deviceIdx, cache_file.c_str());
+ printer::inst()->print_msg(L1, "OpenCL device %u - Load precompiled code from file %s", ctx->deviceIdx, cache_file.c_str());
std::ostringstream ss;
ss << clBinFile.rdbuf();
std::string s = ss.str();
@@ -453,22 +449,21 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
cl_int clStatus;
ctx->Program[miner_algo] = clCreateProgramWithBinary(
opencl_ctx, 1, &ctx->DeviceID, &bin_size,
- (const unsigned char **)&data_ptr, &clStatus, &ret
- );
+ (const unsigned char**)&data_ptr, &clStatus, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateProgramWithBinary. Try to delete file %s", err_to_str(ret), cache_file.c_str());
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateProgramWithBinary. Try to delete file %s", err_to_str(ret), cache_file.c_str());
return ERR_OCL_API;
}
ret = clBuildProgram(ctx->Program[miner_algo], 1, &ctx->DeviceID, NULL, NULL, NULL);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clBuildProgram. Try to delete file %s", err_to_str(ret), cache_file.c_str());
+ printer::inst()->print_msg(L1, "Error %s when calling clBuildProgram. Try to delete file %s", err_to_str(ret), cache_file.c_str());
return ERR_OCL_API;
}
}
- std::vector KernelNames = { "cn2", "Blake", "Groestl", "JH", "Skein" };
+ std::vector KernelNames = {"cn2", "Blake", "Groestl", "JH", "Skein"};
if(miner_algo == cryptonight_gpu)
{
KernelNames.insert(KernelNames.begin(), "cn1_cn_gpu");
@@ -494,7 +489,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
ctx->Kernels[miner_algo][i] = clCreateKernel(ctx->Program[miner_algo], KernelNames[i].c_str(), &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateKernel for kernel_0 %s.", err_to_str(ret), KernelNames[i].c_str());
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateKernel for kernel_0 %s.", err_to_str(ret), KernelNames[i].c_str());
return ERR_OCL_API;
}
}
@@ -508,30 +503,28 @@ const cl_platform_info attributeTypes[5] = {
CL_PLATFORM_VENDOR,
CL_PLATFORM_VERSION,
CL_PLATFORM_PROFILE,
- CL_PLATFORM_EXTENSIONS
-};
+ CL_PLATFORM_EXTENSIONS};
const char* const attributeNames[] = {
"CL_PLATFORM_NAME",
"CL_PLATFORM_VENDOR",
"CL_PLATFORM_VERSION",
"CL_PLATFORM_PROFILE",
- "CL_PLATFORM_EXTENSIONS"
-};
+ "CL_PLATFORM_EXTENSIONS"};
-#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
+#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
uint32_t getNumPlatforms()
{
cl_uint num_platforms = 0;
- cl_platform_id * platforms = NULL;
+ cl_platform_id* platforms = NULL;
cl_int clStatus;
// Get platform and device information
clStatus = clGetPlatformIDs(0, NULL, &num_platforms);
if(clStatus != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for number of platforms.", err_to_str(clStatus));
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetPlatformIDs for number of platforms.", err_to_str(clStatus));
return 0u;
}
@@ -554,29 +547,29 @@ std::vector getAMDDevices(int index)
platforms.resize(numPlatforms);
if((clStatus = clGetPlatformIDs(numPlatforms, platforms.data(), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus));
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus));
return ctxVec;
}
- if((clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices)) != CL_SUCCESS)
+ if((clStatus = clGetDeviceIDs(platforms[index], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for of devices.", err_to_str(clStatus));
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceIDs for of devices.", err_to_str(clStatus));
return ctxVec;
}
device_list.resize(num_devices);
- if((clStatus = clGetDeviceIDs( platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list.data(), NULL)) != CL_SUCCESS)
+ if((clStatus = clGetDeviceIDs(platforms[index], CL_DEVICE_TYPE_GPU, num_devices, device_list.data(), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceIDs for device information.", err_to_str(clStatus));
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceIDs for device information.", err_to_str(clStatus));
return ctxVec;
}
- for (size_t k = 0; k < num_devices; k++)
+ for(size_t k = 0; k < num_devices; k++)
{
std::vector devVendorVec(1024);
if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_VENDOR, devVendorVec.size(), devVendorVec.data(), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get the device vendor name for device %u.", err_to_str(clStatus), k);
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceInfo to get the device vendor name for device %u.", err_to_str(clStatus), k);
continue;
}
@@ -596,19 +589,19 @@ std::vector getAMDDevices(int index)
if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &(ctx.computeUnits), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_MAX_COMPUTE_UNITS for device %u.", err_to_str(clStatus), k);
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_MAX_COMPUTE_UNITS for device %u.", err_to_str(clStatus), k);
continue;
}
if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &(ctx.maxMemPerAlloc), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_MAX_MEM_ALLOC_SIZE for device %u.", err_to_str(clStatus), k);
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_MAX_MEM_ALLOC_SIZE for device %u.", err_to_str(clStatus), k);
continue;
}
if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &(ctx.freeMem), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_GLOBAL_MEM_SIZE for device %u.", err_to_str(clStatus), k);
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_GLOBAL_MEM_SIZE for device %u.", err_to_str(clStatus), k);
continue;
}
@@ -618,14 +611,14 @@ std::vector getAMDDevices(int index)
if((clStatus = clGetDeviceInfo(device_list[k], CL_DEVICE_NAME, devNameVec.size(), devNameVec.data(), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k);
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceInfo to get CL_DEVICE_NAME for device %u.", err_to_str(clStatus), k);
continue;
}
std::vector openCLDriverVer(1024);
if((clStatus = clGetDeviceInfo(device_list[k], CL_DRIVER_VERSION, openCLDriverVer.size(), openCLDriverVer.data(), NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetDeviceInfo to get CL_DRIVER_VERSION for device %u.", err_to_str(clStatus), k);
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetDeviceInfo to get CL_DRIVER_VERSION for device %u.", err_to_str(clStatus), k);
continue;
}
@@ -636,7 +629,7 @@ std::vector getAMDDevices(int index)
ctx.name = std::string(devNameVec.data());
ctx.DeviceID = device_list[k];
ctx.interleave = 40;
- printer::inst()->print_msg(L0,"Found OpenCL GPU %s.",ctx.name.c_str());
+ printer::inst()->print_msg(L0, "Found OpenCL GPU %s.", ctx.name.c_str());
ctxVec.push_back(ctx);
}
}
@@ -651,13 +644,13 @@ int getAMDPlatformIdx()
if(numPlatforms == 0)
{
- printer::inst()->print_msg(L0,"WARNING: No OpenCL platform found.");
+ printer::inst()->print_msg(L0, "WARNING: No OpenCL platform found.");
return -1;
}
- cl_platform_id * platforms = NULL;
+ cl_platform_id* platforms = NULL;
cl_int clStatus;
- platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * numPlatforms);
+ platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
clStatus = clGetPlatformIDs(numPlatforms, platforms, NULL);
int platformIndex = -1;
@@ -666,7 +659,8 @@ int getAMDPlatformIdx()
if(clStatus == CL_SUCCESS)
{
- for (int i = 0; i < numPlatforms; i++) {
+ for(int i = 0; i < numPlatforms; i++)
+ {
size_t infoSize;
clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 0, NULL, &infoSize);
std::vector platformNameVec(infoSize);
@@ -675,13 +669,13 @@ int getAMDPlatformIdx()
std::string platformName(platformNameVec.data());
bool isAMDOpenCL = platformName.find("Advanced Micro Devices") != std::string::npos ||
- platformName.find("Apple") != std::string::npos ||
- platformName.find("Mesa") != std::string::npos;
+ platformName.find("Apple") != std::string::npos ||
+ platformName.find("Mesa") != std::string::npos;
bool isNVIDIADevice = platformName.find("NVIDIA Corporation") != std::string::npos || platformName.find("NVIDIA") != std::string::npos;
std::string selectedOpenCLVendor = xmrstak::params::inst().openCLVendor;
if((isAMDOpenCL && selectedOpenCLVendor == "AMD") || (isNVIDIADevice && selectedOpenCLVendor == "NVIDIA"))
{
- printer::inst()->print_msg(L0,"Found %s platform index id = %i, name = %s", selectedOpenCLVendor.c_str(), i , platformName.c_str());
+ printer::inst()->print_msg(L0, "Found %s platform index id = %i, name = %s", selectedOpenCLVendor.c_str(), i, platformName.c_str());
if(platformName.find("Mesa") != std::string::npos)
mesaPlatform = i;
else
@@ -695,12 +689,12 @@ int getAMDPlatformIdx()
// fall back to Mesa OpenCL
if(platformIndex == -1 && mesaPlatform != -1)
{
- printer::inst()->print_msg(L0,"No AMD platform found select Mesa as OpenCL platform");
+ printer::inst()->print_msg(L0, "No AMD platform found select Mesa as OpenCL platform");
platformIndex = mesaPlatform;
}
}
else
- printer::inst()->print_msg(L1,"WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus));
+ printer::inst()->print_msg(L1, "WARNING: %s when calling clGetPlatformIDs for platform information.", err_to_str(clStatus));
free(platforms);
return platformIndex;
@@ -716,15 +710,14 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
if((ret = clGetPlatformIDs(0, NULL, &entries)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clGetPlatformIDs for number of platforms.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clGetPlatformIDs for number of platforms.", err_to_str(ret));
return ERR_OCL_API;
}
-
// The number of platforms naturally is the index of the last platform plus one.
if(entries <= platform_idx)
{
- printer::inst()->print_msg(L1,"Selected OpenCL platform index %d doesn't exist.", platform_idx);
+ printer::inst()->print_msg(L1, "Selected OpenCL platform index %d doesn't exist.", platform_idx);
return ERR_STUPID_PARAMS;
}
@@ -736,7 +729,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
#endif
if((ret = clGetPlatformIDs(entries, PlatformIDList, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clGetPlatformIDs for platform ID information.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clGetPlatformIDs for platform ID information.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -747,12 +740,12 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
std::string platformName(platformNameVec.data());
if(xmrstak::params::inst().openCLVendor == "AMD" && platformName.find("Advanced Micro Devices") == std::string::npos)
{
- printer::inst()->print_msg(L1,"WARNING: using non AMD device: %s", platformName.c_str());
+ printer::inst()->print_msg(L1, "WARNING: using non AMD device: %s", platformName.c_str());
}
if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, 0, NULL, &entries)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for number of devices.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clGetDeviceIDs for number of devices.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -761,7 +754,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
{
if(ctx[i].deviceIdx >= entries)
{
- printer::inst()->print_msg(L1,"Selected OpenCL device index %lu doesn't exist.\n", ctx[i].deviceIdx);
+ printer::inst()->print_msg(L1, "Selected OpenCL device index %lu doesn't exist.\n", ctx[i].deviceIdx);
return ERR_STUPID_PARAMS;
}
}
@@ -773,7 +766,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
#endif
if((ret = clGetDeviceIDs(PlatformIDList[platform_idx], CL_DEVICE_TYPE_GPU, entries, DeviceIDList, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clGetDeviceIDs for device ID information.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clGetDeviceIDs for device ID information.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -790,41 +783,41 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
cl_context opencl_ctx = clCreateContext(NULL, num_gpus, TempDeviceList.data(), NULL, NULL, &ret);
if(ret != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clCreateContext.", err_to_str(ret));
return ERR_OCL_API;
}
- const char *fastIntMathV2CL =
- #include "./opencl/fast_int_math_v2.cl"
- ;
- const char *fastDivHeavyCL =
- #include "./opencl/fast_div_heavy.cl"
- ;
- const char *cryptonightCL =
- #include "./opencl/cryptonight.cl"
- ;
- const char *blake256CL =
- #include "./opencl/blake256.cl"
- ;
- const char *groestl256CL =
- #include "./opencl/groestl256.cl"
- ;
- const char *jhCL =
- #include "./opencl/jh.cl"
- ;
- const char *wolfAesCL =
- #include "./opencl/wolf-aes.cl"
- ;
- const char *wolfSkeinCL =
- #include "./opencl/wolf-skein.cl"
- ;
- const char *cryptonight_gpu =
- #include "./opencl/cryptonight_gpu.cl"
- ;
+ const char* fastIntMathV2CL =
+#include "./opencl/fast_int_math_v2.cl"
+ ;
+ const char* fastDivHeavyCL =
+#include "./opencl/fast_div_heavy.cl"
+ ;
+ const char* cryptonightCL =
+#include "./opencl/cryptonight.cl"
+ ;
+ const char* blake256CL =
+#include "./opencl/blake256.cl"
+ ;
+ const char* groestl256CL =
+#include "./opencl/groestl256.cl"
+ ;
+ const char* jhCL =
+#include "./opencl/jh.cl"
+ ;
+ const char* wolfAesCL =
+#include "./opencl/wolf-aes.cl"
+ ;
+ const char* wolfSkeinCL =
+#include "./opencl/wolf-skein.cl"
+ ;
+ const char* cryptonight_gpu =
+#include "./opencl/cryptonight_gpu.cl"
+ ;
std::string source_code(cryptonightCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_FAST_INT_MATH_V2"), fastIntMathV2CL);
- source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_FAST_DIV_HEAVY"), fastDivHeavyCL);
+ source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_FAST_DIV_HEAVY"), fastDivHeavyCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_AES"), wolfAesCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_SKEIN"), wolfSkeinCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_JH"), jhCL);
@@ -840,7 +833,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
for(int i = 0; i < num_gpus; ++i)
{
- printer::inst()->print_msg(LDEBUG,"OpenCL Init device %d", ctx[i].deviceIdx);
+ printer::inst()->print_msg(LDEBUG, "OpenCL Init device %d", ctx[i].deviceIdx);
const size_t devIdx = ctx[i].deviceIdx;
if(interleaveData.size() <= devIdx)
{
@@ -850,12 +843,11 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
{
interleaveData[devIdx].reset(new InterleaveData{});
interleaveData[devIdx]->lastRunTimeStamp = get_timestamp_ms();
-
}
- ctx[i].idWorkerOnDevice=interleaveData[devIdx]->numThreadsOnGPU;
+ ctx[i].idWorkerOnDevice = interleaveData[devIdx]->numThreadsOnGPU;
++interleaveData[devIdx]->numThreadsOnGPU;
ctx[i].interleaveData = interleaveData[devIdx];
- ctx[i].interleaveData->adjustThreshold = static_cast(ctx[i].interleave)/100.0;
+ ctx[i].interleaveData->adjustThreshold = static_cast(ctx[i].interleave) / 100.0;
ctx[i].interleaveData->startAdjustThreshold = ctx[i].interleaveData->adjustThreshold;
ctx[i].opencl_ctx = opencl_ctx;
@@ -871,7 +863,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t target, const xmrstak_algo& miner_algo, uint64_t height)
{
- auto & Kernels = ctx->Kernels[miner_algo.Id()];
+ auto& Kernels = ctx->Kernels[miner_algo.Id()];
cl_int ret;
@@ -885,35 +877,35 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->InputBuffer, CL_TRUE, 0, 128, input, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to fill input buffer.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueWriteBuffer to fill input buffer.", err_to_str(ret));
return ERR_OCL_API;
}
if((ret = clSetKernelArg(Kernels[0], 0, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 0.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 0, argument 0.", err_to_str(ret));
return ERR_OCL_API;
}
// Scratchpads
if((ret = clSetKernelArg(Kernels[0], 1, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret));
return ERR_OCL_API;
}
// States
if((ret = clSetKernelArg(Kernels[0], 2, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret));
return ERR_OCL_API;
}
// Threads
if((ret = clSetKernelArg(Kernels[0], 3, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 3.", err_to_str(ret));
- return(ERR_OCL_API);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 0, argument 3.", err_to_str(ret));
+ return (ERR_OCL_API);
}
if(miner_algo == cryptonight_gpu)
@@ -922,80 +914,88 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
// Scratchpads
if((ret = clSetKernelArg(Kernels[7], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 0, argument 1.", err_to_str(ret));
return ERR_OCL_API;
}
// States
if((ret = clSetKernelArg(Kernels[7], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 0, argument 2.", err_to_str(ret));
return ERR_OCL_API;
}
}
- // CN1 Kernel
+ // CN1 Kernel
- if ((miner_algo == cryptonight_r) || (miner_algo == cryptonight_r_wow)) {
+ if((miner_algo == cryptonight_r) || (miner_algo == cryptonight_r_wow))
+ {
- uint32_t PRECOMPILATION_DEPTH = 4;
+ uint32_t PRECOMPILATION_DEPTH = 1;
+ constexpr uint64_t height_chunk_size = 25;
+ uint64_t height_offset = (height / height_chunk_size) * height_chunk_size;
- // Get new kernel
- cl_program program = xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height, PRECOMPILATION_DEPTH);
+ // Get new kernel
+ cl_program program = xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height_offset, height_chunk_size, PRECOMPILATION_DEPTH);
- if (program != ctx->ProgramCryptonightR) {
- cl_int ret;
- cl_kernel kernel = clCreateKernel(program, "cn1_cryptonight_r", &ret);
+ if(program != ctx->ProgramCryptonightR || ctx->last_block_height != height)
+ {
+ cl_int ret;
+ std::string kernel_name = "cn1_cryptonight_r_" + std::to_string(height);
+ cl_kernel kernel = clCreateKernel(program, kernel_name.c_str(), &ret);
- if (ret != CL_SUCCESS) {
- printer::inst()->print_msg(LDEBUG, "CryptonightR: clCreateKernel returned error %s", err_to_str(ret));
- }
- else
+ if(ret != CL_SUCCESS)
+ {
+ printer::inst()->print_msg(LDEBUG, "CryptonightR: clCreateKernel returned error %s", err_to_str(ret));
+ }
+ else
{
- cl_kernel old_kernel = Kernels[1];
+ cl_kernel old_kernel = Kernels[1];
if(old_kernel)
clReleaseKernel(old_kernel);
- Kernels[1] = kernel;
- }
- ctx->ProgramCryptonightR = program;
+ Kernels[1] = kernel;
+ }
+ ctx->ProgramCryptonightR = program;
+ ctx->last_block_height = height;
+ printer::inst()->print_msg(LDEBUG, "Set height %llu", height);
- // Precompile next program in background
- for (int i = 1; i <= PRECOMPILATION_DEPTH; ++i)
- xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height + i, PRECOMPILATION_DEPTH, true);
+ // Precompile next program in background
+ for(int i = 1; i <= PRECOMPILATION_DEPTH; ++i)
+ xmrstak::amd::CryptonightR_get_program(ctx, miner_algo, height_offset + i * height_chunk_size, height_chunk_size, PRECOMPILATION_DEPTH, true);
- printer::inst()->print_msg(LDEBUG, "Thread #%zu updated CryptonightR", ctx->deviceIdx);
- }
+ printer::inst()->print_msg(LDEBUG, "Thread #%zu updated CryptonightR", ctx->deviceIdx);
+ }
else
{
printer::inst()->print_msg(LDEBUG, "Thread #%zu found CryptonightR", ctx->deviceIdx);
}
- }
+ }
// Scratchpads
if((ret = clSetKernelArg(Kernels[1], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 0.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, argument 0.", err_to_str(ret));
return ERR_OCL_API;
}
// States
if((ret = clSetKernelArg(Kernels[1], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 1.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, argument 1.", err_to_str(ret));
return ERR_OCL_API;
}
// Threads
if((ret = clSetKernelArg(Kernels[1], 2, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 2.", err_to_str(ret));
- return(ERR_OCL_API);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, argument 2.", err_to_str(ret));
+ return (ERR_OCL_API);
}
if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon || miner_algo == cryptonight_ipbc || miner_algo == cryptonight_stellite || miner_algo == cryptonight_masari || miner_algo == cryptonight_bittube2)
{
// Input
- if ((ret = clSetKernelArg(Kernels[1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
+ if((ret = clSetKernelArg(Kernels[1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 1, argument 4(input buffer).", err_to_str(ret));
return ERR_OCL_API;
@@ -1006,14 +1006,14 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
// Scratchpads
if((ret = clSetKernelArg(Kernels[2], 0, sizeof(cl_mem), ctx->ExtraBuffers + 0)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 0.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 0.", err_to_str(ret));
return ERR_OCL_API;
}
// States
if((ret = clSetKernelArg(Kernels[2], 1, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 1.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 1.", err_to_str(ret));
return ERR_OCL_API;
}
@@ -1022,59 +1022,59 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
// Output
if((ret = clSetKernelArg(Kernels[2], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), 2, 2);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), 2, 2);
return ERR_OCL_API;
}
// Target
if((ret = clSetKernelArg(Kernels[2], 3, sizeof(cl_ulong), &target)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), 2, 3);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), 2, 3);
return ERR_OCL_API;
}
// Threads
if((ret = clSetKernelArg(Kernels[2], 4, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret));
- return(ERR_OCL_API);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret));
+ return (ERR_OCL_API);
}
}
else
- {
+ {
// Branch 0
if((ret = clSetKernelArg(Kernels[2], 2, sizeof(cl_mem), ctx->ExtraBuffers + 2)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 2.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 2.", err_to_str(ret));
return ERR_OCL_API;
}
// Branch 1
if((ret = clSetKernelArg(Kernels[2], 3, sizeof(cl_mem), ctx->ExtraBuffers + 3)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 3.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 3.", err_to_str(ret));
return ERR_OCL_API;
}
// Branch 2
if((ret = clSetKernelArg(Kernels[2], 4, sizeof(cl_mem), ctx->ExtraBuffers + 4)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 4.", err_to_str(ret));
return ERR_OCL_API;
}
// Branch 3
if((ret = clSetKernelArg(Kernels[2], 5, sizeof(cl_mem), ctx->ExtraBuffers + 5)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 5.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 5.", err_to_str(ret));
return ERR_OCL_API;
}
// Threads
if((ret = clSetKernelArg(Kernels[2], 6, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 6.", err_to_str(ret));
- return(ERR_OCL_API);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel 2, argument 6.", err_to_str(ret));
+ return (ERR_OCL_API);
}
for(int i = 0; i < 4; ++i)
@@ -1082,35 +1082,35 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
// States
if((ret = clSetKernelArg(Kernels[i + 3], 0, sizeof(cl_mem), ctx->ExtraBuffers + 1)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 0);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 0);
return ERR_OCL_API;
}
// Nonce buffer
if((ret = clSetKernelArg(Kernels[i + 3], 1, sizeof(cl_mem), ctx->ExtraBuffers + (i + 2))) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 1);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 1);
return ERR_OCL_API;
}
// Output
if((ret = clSetKernelArg(Kernels[i + 3], 2, sizeof(cl_mem), &ctx->OutputBuffer)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 2);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 2);
return ERR_OCL_API;
}
// Target
if((ret = clSetKernelArg(Kernels[i + 3], 3, sizeof(cl_ulong), &target)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 3);
return ERR_OCL_API;
}
if((clSetKernelArg(Kernels[i + 3], 4, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4);
- return(ERR_OCL_API);
+ printer::inst()->print_msg(L1, "Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4);
+ return (ERR_OCL_API);
}
}
}
@@ -1134,7 +1134,7 @@ uint64_t updateTimings(GpuContext* ctx, const uint64_t t)
if(ctx->interleaveData->avgKernelRuntime == 0.0 || ctx->interleaveData->avgKernelRuntime > 20000.0)
ctx->interleaveData->avgKernelRuntime = runtime;
else
- ctx->interleaveData->avgKernelRuntime = ctx->interleaveData->avgKernelRuntime * (1.0 - averagingBias) + (runtime) * averagingBias;
+ ctx->interleaveData->avgKernelRuntime = ctx->interleaveData->avgKernelRuntime * (1.0 - averagingBias) + (runtime)*averagingBias;
}
return runtime;
}
@@ -1163,7 +1163,7 @@ uint64_t interleaveAdjustDelay(GpuContext* ctx, const bool enableAutoAdjustment)
if((dt > 0) && (dt < optimalTimeOffset))
{
- delay = static_cast((optimalTimeOffset - dt));
+ delay = static_cast((optimalTimeOffset - dt));
if(enableAutoAdjustment)
{
@@ -1182,8 +1182,7 @@ uint64_t interleaveAdjustDelay(GpuContext* ctx, const bool enableAutoAdjustment)
// avoid that the auto adjustment is disable interleaving
ctx->interleaveData->adjustThreshold = std::max(
ctx->interleaveData->adjustThreshold,
- 0.001
- );
+ 0.001);
}
delay = std::max(int64_t(0), delay);
@@ -1194,13 +1193,12 @@ uint64_t interleaveAdjustDelay(GpuContext* ctx, const bool enableAutoAdjustment)
{
// do not notify the user anymore if we reach a good delay
if(delay > maxDelay)
- printer::inst()->print_msg(L1,"OpenCL Interleave %u|%u: %u/%.2lf ms - %.1lf",
+ printer::inst()->print_msg(L1, "OpenCL Interleave %u|%u: %u/%.2lf ms - %.1lf",
ctx->deviceIdx,
ctx->idWorkerOnDevice,
static_cast(delay),
avgRuntime,
- ctx->interleaveData->adjustThreshold * 100.
- );
+ ctx->interleaveData->adjustThreshold * 100.);
std::this_thread::sleep_for(std::chrono::milliseconds(delay));
}
@@ -1211,12 +1209,12 @@ uint64_t interleaveAdjustDelay(GpuContext* ctx, const bool enableAutoAdjustment)
size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner_algo)
{
- const auto & Kernels = ctx->Kernels[miner_algo.Id()];
+ const auto& Kernels = ctx->Kernels[miner_algo.Id()];
cl_int ret;
cl_uint zero = 0;
size_t BranchNonces[4];
- memset(BranchNonces,0,sizeof(size_t)*4);
+ memset(BranchNonces, 0, sizeof(size_t) * 4);
size_t g_intensity = ctx->rawIntensity;
size_t w_size = ctx->workSize;
@@ -1227,28 +1225,28 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner
// round up to next multiple of w_size
g_thd = ((g_intensity + w_size - 1u) / w_size) * w_size;
// number of global threads must be a multiple of the work group size (w_size)
- assert(g_thd%w_size == 0);
+ assert(g_thd % w_size == 0);
}
for(int i = 2; i < 6; ++i)
{
if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->ExtraBuffers[i], CL_FALSE, sizeof(cl_uint) * g_intensity, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to zero branch buffer counter %d.", err_to_str(ret), i - 2);
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueWriteBuffer to zero branch buffer counter %d.", err_to_str(ret), i - 2);
return ERR_OCL_API;
}
}
if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(cl_uint), &zero, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueWriteBuffer to fetch results.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueWriteBuffer to fetch results.", err_to_str(ret));
return ERR_OCL_API;
}
- size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { 8, 8 };
+ size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = {g_thd, 8}, lthreads[2] = {8, 8};
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0);
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0);
return ERR_OCL_API;
}
@@ -1260,7 +1258,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner
size_t intens = g_intensity * thd;
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[7], 1, 0, &intens, &thd, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 7);
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 7);
return ERR_OCL_API;
}
@@ -1269,7 +1267,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[1], 1, 0, &g_thd_cn_gpu, &w_size_cn_gpu, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
return ERR_OCL_API;
}
}
@@ -1277,25 +1275,25 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner
{
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[1], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 1);
return ERR_OCL_API;
}
}
- if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[2], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
+ size_t NonceT[2] = {0, ctx->Nonce}, gthreadsT[2] = {8, g_thd}, lthreadsT[2] = {8 , w_size};
+ if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[2], 2, NonceT, gthreadsT, lthreadsT, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 2);
- return ERR_OCL_API;
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 2);
+ return ERR_OCL_API;
}
if(miner_algo != cryptonight_gpu)
{
for(int i = 0; i < 4; ++i)
{
- size_t tmpNonce = ctx->Nonce;
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, Kernels[i + 3], 1, &tmpNonce, &g_thd, &w_size, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);
return ERR_OCL_API;
}
}
@@ -1304,11 +1302,11 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, const xmrstak_algo& miner
// this call is blocking therefore the access to the results without cl_finish is fine
if((ret = clEnqueueReadBuffer(ctx->CommandQueues, ctx->OutputBuffer, CL_TRUE, 0, sizeof(cl_uint) * 0x100, HashOutput, 0, NULL, NULL)) != CL_SUCCESS)
{
- printer::inst()->print_msg(L1,"Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
+ printer::inst()->print_msg(L1, "Error %s when calling clEnqueueReadBuffer to fetch results.", err_to_str(ret));
return ERR_OCL_API;
}
- auto & numHashValues = HashOutput[0xFF];
+ auto& numHashValues = HashOutput[0xFF];
// avoid out of memory read, we have only storage for 0xFF results
if(numHashValues > 0xFF)
numHashValues = 0xFF;
diff --git a/xmrstak/backend/amd/amd_gpu/gpu.hpp b/xmrstak/backend/amd/amd_gpu/gpu.hpp
index ae2b506db..1ba300c7a 100644
--- a/xmrstak/backend/amd/amd_gpu/gpu.hpp
+++ b/xmrstak/backend/amd/amd_gpu/gpu.hpp
@@ -1,7 +1,7 @@
#pragma once
-#include "xmrstak/misc/console.hpp"
#include "xmrstak/jconf.hpp"
+#include "xmrstak/misc/console.hpp"
#if defined(__APPLE__)
#include
@@ -9,13 +9,13 @@
#include
#endif
+#include
+#include