3.95" tft LCD for Mega 2560 not working

I have bought a new 3.95" TFT LCD screen for my Mega 2560, but when I plug it in and upload the below code, the screen goes white and doesn't do anything.

// All the mcufriend.com UNO shields have the same pinout.
// i.e. control pins A0-A4.  Data D2-D9.  microSD D10-D13.
// Touchscreens are normally A1, A2, D7, D6 but the order varies
//
// This demo should work with most Adafruit TFT libraries
// If you are not using a shield,  use a full Adafruit constructor()
// e.g. Adafruit_TFTLCD tft(LCD_CS, LCD_CD, LCD_WR, LCD_RD, LCD_RESET);

#define LCD_CS A3 // Chip Select goes to Analog 3
#define LCD_CD A2 // Command/Data goes to Analog 2
#define LCD_WR A1 // LCD Write goes to Analog 1
#define LCD_RD A0 // LCD Read goes to Analog 0
#define LCD_RESET A4 // Can alternately just connect to Arduino's reset pin

#include <SPI.h>          // f.k. for Arduino-1.5.2
#include "Adafruit_GFX.h"// Hardware-specific library
#include <MCUFRIEND_kbv.h>
MCUFRIEND_kbv tft;
//#include <Adafruit_TFTLCD.h>
//Adafruit_TFTLCD tft(LCD_CS, LCD_CD, LCD_WR, LCD_RD, LCD_RESET);

// Assign human-readable names to some common 16-bit color values:
#define	BLACK   0x0000
#define	BLUE    0x001F
#define	RED     0xF800
#define	GREEN   0x07E0
#define CYAN    0x07FF
#define MAGENTA 0xF81F
#define YELLOW  0xFFE0
#define WHITE   0xFFFF

#ifndef min
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif

void setup(void);
void loop(void);
unsigned long testFillScreen();
unsigned long testText();
unsigned long testLines(uint16_t color);
unsigned long testFastLines(uint16_t color1, uint16_t color2);
unsigned long testRects(uint16_t color);
unsigned long testFilledRects(uint16_t color1, uint16_t color2);
unsigned long testFilledCircles(uint8_t radius, uint16_t color);
unsigned long testCircles(uint8_t radius, uint16_t color);
unsigned long testTriangles();
unsigned long testFilledTriangles();
unsigned long testRoundRects();
unsigned long testFilledRoundRects();
void progmemPrint(const char *str);
void progmemPrintln(const char *str);

void runtests(void);

uint16_t g_identifier;

extern const uint8_t hanzi[];
void showhanzi(unsigned int x, unsigned int y, unsigned char index)
{
    uint8_t i, j, c, first = 1;
    uint8_t *temp = (uint8_t*)hanzi;
    uint16_t color;
    tft.setAddrWindow(x, y, x + 31, y + 31); //设置区域
    temp += index * 128;
    for (j = 0; j < 128; j++)
    {
        c = pgm_read_byte(temp);
        for (i = 0; i < 8; i++)
        {
            if ((c & (1 << i)) != 0)
            {
                color = RED;
            }
            else
            {
                color = BLACK;
            }
            tft.pushColors(&color, 1, first);
            first = 0;
        }
        temp++;
    }
}

void setup(void) {
    Serial.begin(9600);
    uint32_t when = millis();
    //    while (!Serial) ;   //hangs a Leonardo until you connect a Serial
    if (!Serial) delay(5000);           //allow some time for Leonardo
    Serial.println("Serial took " + String((millis() - when)) + "ms to start");
    //    tft.reset();                 //hardware reset
    uint16_t ID = tft.readID(); //
    Serial.print("ID = 0x");
    Serial.println(ID, HEX);
    if (ID == 0xD3D3) ID = 0x9481; // write-only shield
//    ID = 0x9329;                             // force ID
    tft.begin(ID);
}

#if defined(MCUFRIEND_KBV_H_)
uint16_t scrollbuf[320];    // my biggest screen is 320x480
#define READGRAM(x, y, buf, w, h)  tft.readGRAM(x, y, buf, w, h)
#else
uint16_t scrollbuf[320];    // Adafruit only does 240x320
// Adafruit can read a block by one pixel at a time
int16_t  READGRAM(int16_t x, int16_t y, uint16_t *block, int16_t w, int16_t h)
{
    uint16_t *p;
    for (int row = 0; row < h; row++) {
        p = block + row * w;
        for (int col = 0; col < w; col++) {
            *p++ = tft.readPixel(x + col, y + row);
        }
    }
}
#endif

void windowScroll(int16_t x, int16_t y, int16_t wid, int16_t ht, int16_t dx, int16_t dy, uint16_t *buf)
{
    if (dx) for (int16_t row = 0; row < ht; row++) {
            READGRAM(x, y + row, buf, wid, 1);
            tft.setAddrWindow(x, y + row, x + wid - 1, y + row);
            tft.pushColors(buf + dx, wid - dx, 1);
            tft.pushColors(buf + 0, dx, 0);
        }
    if (dy) for (int16_t col = 0; col < wid; col++) {
            READGRAM(x + col, y, buf, 1, ht);
            tft.setAddrWindow(x + col, y, x + col, y + ht - 1);
            tft.pushColors(buf + dy, ht - dy, 1);
            tft.pushColors(buf + 0, dy, 0);
        }
}

void printmsg(int row, const char *msg)
{
    tft.setTextColor(YELLOW, BLACK);
    tft.setCursor(0, row);
    tft.println(msg);
}

void loop(void) {
    uint8_t aspect;
    uint16_t pixel;
    const char *aspectname[] = {
        "PORTRAIT", "LANDSCAPE", "PORTRAIT_REV", "LANDSCAPE_REV"
    };
    const char *colorname[] = { "BLUE", "GREEN", "RED", "GRAY" };
    uint16_t colormask[] = { 0x001F, 0x07E0, 0xF800, 0xFFFF };
    uint16_t dx, rgb, n, wid, ht, msglin;
    tft.setRotation(0);
    runtests();
    delay(2000);
    if (tft.height() > 64) {
        for (uint8_t cnt = 0; cnt < 4; cnt++) {
            aspect = (cnt + 0) & 3;
            tft.setRotation(aspect);
            wid = tft.width();
            ht = tft.height();
            msglin = (ht > 160) ? 200 : 112;
            testText();
            dx = wid / 32;
            for (n = 0; n < 32; n++) {
                rgb = n * 8;
                rgb = tft.color565(rgb, rgb, rgb);
                tft.fillRect(n * dx, 48, dx, 63, rgb & colormask[aspect]);
            }
            tft.drawRect(0, 48 + 63, wid, 1, WHITE);
            tft.setTextSize(2);
            tft.setTextColor(colormask[aspect], BLACK);
            tft.setCursor(0, 72);
            tft.print(colorname[aspect]);
            tft.setTextColor(WHITE);
            tft.println(" COLOR GRADES");
            tft.setTextColor(WHITE, BLACK);
            printmsg(184, aspectname[aspect]);
            delay(1000);
            tft.drawPixel(0, 0, YELLOW);
            pixel = tft.readPixel(0, 0);
            tft.setTextSize((ht > 160) ? 2 : 1); //for messages
#if defined(MCUFRIEND_KBV_H_)
#if 1
            extern const uint8_t penguin[];
            tft.setAddrWindow(wid - 40 - 40, 20 + 0, wid - 1 - 40, 20 + 39);
            tft.pushColors(penguin, 1600, 1);
#elif 1
            extern const uint8_t wifi_full[];
            tft.setAddrWindow(wid - 40 - 40, 20 + 0, wid - 40 - 40 + 31, 20 + 31);
            tft.pushColors(wifi_full, 1024, 1, true);
#elif 1
            extern const uint8_t icon_40x40[];
            tft.setAddrWindow(wid - 40 - 40, 20 + 0, wid - 1 - 40, 20 + 39);
            tft.pushColors(icon_40x40, 1600, 1);
#endif
            tft.setAddrWindow(0, 0, wid - 1, ht - 1);
            if (aspect & 1) tft.drawRect(wid - 1, 0, 1, ht, WHITE);
            else tft.drawRect(0, ht - 1, wid, 1, WHITE);
            printmsg(msglin, "VERTICAL SCROLL UP");
            uint16_t maxscroll;
            if (tft.getRotation() & 1) maxscroll = wid;
            else maxscroll = ht;
            for (uint16_t i = 1; i <= maxscroll; i++) {
                tft.vertScroll(0, maxscroll, i);
                delay(10);
            }
            delay(1000);
			printmsg(msglin, "VERTICAL SCROLL DN");
            for (uint16_t i = 1; i <= maxscroll; i++) {
                tft.vertScroll(0, maxscroll, 0 - (int16_t)i);
                delay(10);
            }
			tft.vertScroll(0, maxscroll, 0);
            printmsg(msglin, "SCROLL DISABLED   ");

            delay(1000);
            if ((aspect & 1) == 0) { //Portrait
                tft.setTextColor(BLUE, BLACK);
                printmsg(msglin, "ONLY THE COLOR BAND");
                for (uint16_t i = 1; i <= 64; i++) {
                    tft.vertScroll(48, 64, i);
                    delay(20);
                }
                delay(1000);
            }
#endif
            tft.setTextColor(YELLOW, BLACK);
            if (pixel == YELLOW) {
                printmsg(msglin, "SOFTWARE SCROLL    ");
#if 0
                // diagonal scroll of block
                for (int16_t i = 45, dx = 2, dy = 1; i > 0; i -= dx) {
                    windowScroll(24, 8, 90, 40, dx, dy, scrollbuf);
                }
#else
                // plain horizontal scroll of block
                n = (wid > 320) ? 320 : wid;
                for (int16_t i = n, dx = 4, dy = 0; i > 0; i -= dx) {
                    windowScroll(0, 200, n, 16, dx, dy, scrollbuf);
                }
#endif
            }
            else if (pixel == CYAN)
                tft.println("readPixel() reads as BGR");
            else if ((pixel & 0xF8F8) == 0xF8F8)
                tft.println("readPixel() should be 24-bit");
            else {
                tft.print("readPixel() reads 0x");
                tft.println(pixel, HEX);
            }
            delay(5000);
        }
    }
    printmsg(msglin, "INVERT DISPLAY ");
    tft.invertDisplay(true);
    delay(2000);
    tft.invertDisplay(false);
}

typedef struct {
    PGM_P msg;
    uint32_t ms;
} TEST;
TEST result[12];

#define RUNTEST(n, str, test) { result[n].msg = PSTR(str); result[n].ms = test; delay(500); }

void runtests(void)
{
    uint8_t i, len = 24, cnt;
    uint32_t total;
    RUNTEST(0, "FillScreen               ", testFillScreen());
    RUNTEST(1, "Text                     ", testText());
    RUNTEST(2, "Lines                    ", testLines(CYAN));
    RUNTEST(3, "Horiz/Vert Lines         ", testFastLines(RED, BLUE));
    RUNTEST(4, "Rectangles (outline)     ", testRects(GREEN));
    RUNTEST(5, "Rectangles (filled)      ", testFilledRects(YELLOW, MAGENTA));
    RUNTEST(6, "Circles (filled)         ", testFilledCircles(10, MAGENTA));
    RUNTEST(7, "Circles (outline)        ", testCircles(10, WHITE));
    RUNTEST(8, "Triangles (outline)      ", testTriangles());
    RUNTEST(9, "Triangles (filled)       ", testFilledTriangles());
    RUNTEST(10, "Rounded rects (outline)  ", testRoundRects());
    RUNTEST(11, "Rounded rects (filled)   ", testFilledRoundRects());

    tft.fillScreen(BLACK);
    tft.setTextColor(GREEN);
    tft.setCursor(0, 0);
    uint16_t wid = tft.width();
    if (wid > 176) {
        tft.setTextSize(2);
#if defined(MCUFRIEND_KBV_H_)
        tft.print("MCUFRIEND ");
#if MCUFRIEND_KBV_H_ != 0
        tft.print(0.01 * MCUFRIEND_KBV_H_, 2);
#else
        tft.print("for");
#endif
        tft.println(" UNO");
#else
        tft.println("Adafruit-Style Tests");
#endif
    } else len = wid / 6 - 8;
    tft.setTextSize(1);
    total = 0;
    for (i = 0; i < 12; i++) {
        PGM_P str = result[i].msg;
        char c;
        if (len > 24) {
            if (i < 10) tft.print(" ");
            tft.print(i);
            tft.print(": ");
        }
        uint8_t cnt = len;
        while ((c = pgm_read_byte(str++)) && cnt--) tft.print(c);
        tft.print(" ");
        tft.println(result[i].ms);
        total += result[i].ms;
    }
    tft.setTextSize(2);
    tft.print("Total:");
    tft.print(0.000001 * total);
    tft.println("sec");
    g_identifier = tft.readID();
    tft.print("ID: 0x");
    tft.println(tft.readID(), HEX);
//    tft.print("Reg(00):0x");
//    tft.println(tft.readReg(0x00), HEX);
    tft.print("F_CPU:");
    tft.print(0.000001 * F_CPU);
#if defined(__OPTIMIZE_SIZE__)
    tft.println("MHz -Os");
#else
    tft.println("MHz");
#endif

    delay(10000);
}

// Standard Adafruit tests.  will adjust to screen size

unsigned long testFillScreen() {
    unsigned long start = micros();
    tft.fillScreen(BLACK);
    tft.fillScreen(RED);
    tft.fillScreen(GREEN);
    tft.fillScreen(BLUE);
    tft.fillScreen(BLACK);
    return micros() - start;
}

unsigned long testText() {
    unsigned long start;
    tft.fillScreen(BLACK);
    start = micros();
    tft.setCursor(0, 0);
    tft.setTextColor(WHITE);  tft.setTextSize(1);
    tft.println("Hello World!");
    tft.setTextColor(YELLOW); tft.setTextSize(2);
    tft.println(123.45);
    tft.setTextColor(RED);    tft.setTextSize(3);
    tft.println(0xDEADBEEF, HEX);
    tft.println();
    tft.setTextColor(GREEN);
    tft.setTextSize(5);
    tft.println("Groop");
    tft.setTextSize(2);
    tft.println("I implore thee,");
    tft.setTextSize(1);
    tft.println("my foonting turlingdromes.");
    tft.println("And hooptiously drangle me");
    tft.println("with crinkly bindlewurdles,");
    tft.println("Or I will rend thee");
    tft.println("in the gobberwarts");
    tft.println("with my blurglecruncheon,");
    tft.println("see if I don't!");
    return micros() - start;
}

unsigned long testLines(uint16_t color) {
    unsigned long start, t;
    int           x1, y1, x2, y2,
                  w = tft.width(),
                  h = tft.height();

    tft.fillScreen(BLACK);

    x1 = y1 = 0;
    y2    = h - 1;
    start = micros();
    for (x2 = 0; x2 < w; x2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    x2    = w - 1;
    for (y2 = 0; y2 < h; y2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    t     = micros() - start; // fillScreen doesn't count against timing

    tft.fillScreen(BLACK);

    x1    = w - 1;
    y1    = 0;
    y2    = h - 1;
    start = micros();
    for (x2 = 0; x2 < w; x2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    x2    = 0;
    for (y2 = 0; y2 < h; y2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    t    += micros() - start;

    tft.fillScreen(BLACK);

    x1    = 0;
    y1    = h - 1;
    y2    = 0;
    start = micros();
    for (x2 = 0; x2 < w; x2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    x2    = w - 1;
    for (y2 = 0; y2 < h; y2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    t    += micros() - start;

    tft.fillScreen(BLACK);

    x1    = w - 1;
    y1    = h - 1;
    y2    = 0;
    start = micros();
    for (x2 = 0; x2 < w; x2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    x2    = 0;
    for (y2 = 0; y2 < h; y2 += 6) tft.drawLine(x1, y1, x2, y2, color);

    return micros() - start;
}

unsigned long testFastLines(uint16_t color1, uint16_t color2) {
    unsigned long start;
    int           x, y, w = tft.width(), h = tft.height();

    tft.fillScreen(BLACK);
    start = micros();
    for (y = 0; y < h; y += 5) tft.drawFastHLine(0, y, w, color1);
    for (x = 0; x < w; x += 5) tft.drawFastVLine(x, 0, h, color2);

    return micros() - start;
}

unsigned long testRects(uint16_t color) {
    unsigned long start;
    int           n, i, i2,
                  cx = tft.width()  / 2,
                  cy = tft.height() / 2;

    tft.fillScreen(BLACK);
    n     = min(tft.width(), tft.height());
    start = micros();
    for (i = 2; i < n; i += 6) {
        i2 = i / 2;
        tft.drawRect(cx - i2, cy - i2, i, i, color);
    }

    return micros() - start;
}

unsigned long testFilledRects(uint16_t color1, uint16_t color2) {
    unsigned long start, t = 0;
    int           n, i, i2,
                  cx = tft.width()  / 2 - 1,
                  cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    n = min(tft.width(), tft.height());
    for (i = n; i > 0; i -= 6) {
        i2    = i / 2;
        start = micros();
        tft.fillRect(cx - i2, cy - i2, i, i, color1);
        t    += micros() - start;
        // Outlines are not included in timing results
        tft.drawRect(cx - i2, cy - i2, i, i, color2);
    }

    return t;
}

unsigned long testFilledCircles(uint8_t radius, uint16_t color) {
    unsigned long start;
    int x, y, w = tft.width(), h = tft.height(), r2 = radius * 2;

    tft.fillScreen(BLACK);
    start = micros();
    for (x = radius; x < w; x += r2) {
        for (y = radius; y < h; y += r2) {
            tft.fillCircle(x, y, radius, color);
        }
    }

    return micros() - start;
}

unsigned long testCircles(uint8_t radius, uint16_t color) {
    unsigned long start;
    int           x, y, r2 = radius * 2,
                        w = tft.width()  + radius,
                        h = tft.height() + radius;

    // Screen is not cleared for this one -- this is
    // intentional and does not affect the reported time.
    start = micros();
    for (x = 0; x < w; x += r2) {
        for (y = 0; y < h; y += r2) {
            tft.drawCircle(x, y, radius, color);
        }
    }

    return micros() - start;
}

unsigned long testTriangles() {
    unsigned long start;
    int           n, i, cx = tft.width()  / 2 - 1,
                        cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    n     = min(cx, cy);
    start = micros();
    for (i = 0; i < n; i += 5) {
        tft.drawTriangle(
            cx    , cy - i, // peak
            cx - i, cy + i, // bottom left
            cx + i, cy + i, // bottom right
            tft.color565(0, 0, i));
    }

    return micros() - start;
}

unsigned long testFilledTriangles() {
    unsigned long start, t = 0;
    int           i, cx = tft.width()  / 2 - 1,
                     cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    start = micros();
    for (i = min(cx, cy); i > 10; i -= 5) {
        start = micros();
        tft.fillTriangle(cx, cy - i, cx - i, cy + i, cx + i, cy + i,
                         tft.color565(0, i, i));
        t += micros() - start;
        tft.drawTriangle(cx, cy - i, cx - i, cy + i, cx + i, cy + i,
                         tft.color565(i, i, 0));
    }

    return t;
}

unsigned long testRoundRects() {
    unsigned long start;
    int           w, i, i2, red, step,
                  cx = tft.width()  / 2 - 1,
                  cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    w     = min(tft.width(), tft.height());
    start = micros();
    red = 0;
    step = (256 * 6) / w;
    for (i = 0; i < w; i += 6) {
        i2 = i / 2;
        red += step;
        tft.drawRoundRect(cx - i2, cy - i2, i, i, i / 8, tft.color565(red, 0, 0));
    }

    return micros() - start;
}

unsigned long testFilledRoundRects() {
    unsigned long start;
    int           i, i2, green, step,
                  cx = tft.width()  / 2 - 1,
                  cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    start = micros();
    green = 256;
    step = (256 * 6) / min(tft.width(), tft.height());
    for (i = min(tft.width(), tft.height()); i > 20; i -= 6) {
        i2 = i / 2;
        green -= step;
        tft.fillRoundRect(cx - i2, cy - i2, i, i, i / 8, tft.color565(0, green, 0));
    }

    return micros() - start;
}

Serial monitor says this:

Serial took 0ms to start
ID = 0xD3D3

My screen: https://www.ebay.com.au/itm/324282797274

Why isn't it working? Can anyone help me?

Thank you!

If the product title says
" 3.95" 3.5" 1.44" TFT LCD Touch Screen Display Module for Arduino Mega 2560 Uno"

this is not a guarantee that the product will work "out of the box"

The microcontroller-world is not super-standardised like USB-devices.
You have to take care of more details than just

"does the plug fit into the socket?"

The physical dimenions fit pure physically into the Arduino Mega 2560-board.
To make it work you have to know what kind of driver-chip is used on that board.
And then include a library that fits to this driver-chip.

The ebay-shop seems not to provide any information about the driver-chip on the productside
You should write a message to the seller.

take a high-resolution picture of both sides of your display. The resolution must be high enough to make the numbers on the bigger chips mounted on the PCB readable.

best regards Stefan

The four large chips are LVC245A, and the small chip is XPT2046.
I uploaded from the IDE -> File->Examples->Mcufriend_kbv->LCD_ID_readreg, Serial monitor says:

Read Registers on MCUFRIEND UNO shield
controllers either read as single 16-bit
e.g. the ID is at readReg(0)
or as a sequence of 8-bit values
in special locations (first is dummy)

reg(0x0000) 00 00	ID: ILI9320, ILI9325, ILI9335, ...
reg(0x0004) 04 04 04 04	Manufacturer ID
reg(0x0009) 09 09 09 09 09	Status Register
reg(0x000A) 0A 0A	Get Power Mode
reg(0x000C) 0C 0C	Get Pixel Format
reg(0x0061) 61 61	RDID1 HX8347-G
reg(0x0062) 62 62	RDID2 HX8347-G
reg(0x0063) 63 63	RDID3 HX8347-G
reg(0x0064) 64 64	RDID1 HX8347-A
reg(0x0065) 65 65	RDID2 HX8347-A
reg(0x0066) 66 66	RDID3 HX8347-A
reg(0x0067) 67 67	RDID Himax HX8347-A
reg(0x0070) 70 70	Panel Himax HX8347-A
reg(0x00A1) A1 A1 A1 A1 A1	RD_DDB SSD1963
reg(0x00B0) B0 B0	RGB Interface Signal Control
reg(0x00B4) B4 B4	Inversion Control
reg(0x00B6) B6 B6 B6 B6 B6	Display Control
reg(0x00B7) B7 B7	Entry Mode Set
reg(0x00BF) BF BF BF BF BF BF	ILI9481, HX8357-B
reg(0x00C0) C0 C0 C0 C0 C0 C0 C0 C0 C0	Panel Control
reg(0x00C8) C8 C8 C8 C8 C8 C8 C8 C8 C8 C8 C8 C8 C8	GAMMA
reg(0x00CC) CC CC	Panel Control
reg(0x00D0) D0 D0 D0	Power Control
reg(0x00D2) D2 D2 D2 D2 D2	NVM Read
reg(0x00D3) D3 D3 D3 D3	ILI9341, ILI9488
reg(0x00D4) D4 D4 D4 D4	Novatek ID
reg(0x00DA) DA DA	RDID1
reg(0x00DB) DB DB	RDID2
reg(0x00DC) DC DC	RDID3
reg(0x00E0) E0 E0 E0 E0 E0 E0 E0 E0 E0 E0 E0 E0 E0 E0 E0 E0	GAMMA-P
reg(0x00E1) E1 E1 E1 E1 E1 E1 E1 E1 E1 E1 E1 E1 E1 E1 E1 E1	GAMMA-N
reg(0x00EF) EF EF EF EF EF EF	ILI9327
reg(0x00F2) F2 F2 F2 F2 F2 F2 F2 F2 F2 F2 F2 F2	Adjust Control 2
reg(0x00F6) F6 F6 F6 F6	Interface Control

I than ran from the IDE -> File->Examples->Mcufriend_kbv->graphictest_kbv, Serial monitor says:

Serial took 0ms to start
ID = 0xD3D3

But when I upload from the IDE -> File->Examples->Mcufriend_kbv->drawBitmap_kbv, Serial monitor says:

ID = 0xD3D3
GFX drawBitmap() plots one mono pixel at a time
it defaults to transparent plotting
unless you specify foreground and background colours
it can plot a monochrome bitmap from Flash or SRAM

GFX drawGrayscaleBitmap() is not relevant for OLED, TFT
GFX drawRGBBitmap() plots one colour pixel at a time
from Flash or SRAM and with a monochrome transparent bitmask

Using the hardware pushColors() methods is faster
pushColors() expects uint16 array in SRAM
for any runtime generated images
but it expects uint8_t array of serialised bytes in Flash

Colour TFTs are natively big-endian
Many microcontrollers and Tools are little-endian
you can use the optional argument to select big-end

and the screen does absolutely nothing.

LVC245A is a simple tri-state linedriver chip
XPT2046 ist the touch-controller-chip

which means these are not the graphic-pixel-driver chips
post a

picture

So I did - guess what ? - googling

and found this thread

this thread looks promising. So if you do the steps described there
and change your thread-title from an un-specific "not working"
to
words down to the point what info you are searching for
you wil get better help

best regards Stefan

Your topic has been moved to the dedicated display section on the forum in the hope that it gets the attention of the specialist(s).

First off. Thanks for providing the link.

Your display will work with MCUFRIEND_kbv but you have to make some edits.
From the "extras/mcufriend_how_to.txt" file

17. If you do not have a standard Uno Shield, you can add a SPECIAL to the mcufriend_special.h
    Edit mcufriend_shield.h:  #define USE_SPECIAL
    Edit mcufriend_special.h: e.g. #define USE_MEGA_16BIT_SHIELD
    If your "special" is write-only,  the library can not read the ID.  It always returns 0xD3D3

In your case you need USE_SPECIAL, USE_MEGA_8BIT_PORTC_SHIELD
Since your shield is write-only you have to tell it the correct ID e.g.
tft.begin(0x9488);

Look at the packaging. It might say ILI9488 or it might be ST7796S.

If you change R4, R5 you would need USE_SPECIAL, USE_MEGA_16BIT_SHIELD

David.

My LCD is not a Uno shield. It plugs into the double headers of a Mega 2560. Does this change anything? Secondly, where do I put:

tft.begin(0x9488);

Thirdly, from my research it looks like it is ILI9488. Do I have to change it in the code somewhere? This is my first touchscreen and the example code is pretty complicated, so I have no idea where to change things or what I''m doing.

Fourthly, I am not changing resistors at this point, just using as-is.

Thanks for your help @david_prentice !!!!! :grinning:
image

Your link in #1 shows a Red 3.95 inch Mega Shield.
Your photo in #7 shows a different 3.8 inch Mega Shield.

The #1 Shield needs USE_SPECIAL, USE_MEGA_8BIT_PORTC_SHIELD
The #7 Shield needs USE_SPECIAL, USE_MEGA_8BIT_SHIELD

You will find the tft.begin(0x9488); in the setup() function of a sketch.

If you had bought the Blue 3.5 inch Uno Shield from #1 link the MCUFRIEND_kbv library works out of the box. i.e. on the Mega as well as Uno

However your Red Mega Shield only fits on a Mega or Due.
You just have to follow the instructions in the "how_to"

David.

Yes, mine only fits on a Mega 2560, that is correct. The photo is not quite the same as the link, but the connections are the same.

This is the one I bought:

Your picture in #9 matches the link in #1. i.e. follow instructions from #6.

Now do you see why it is important to post accurate information.
When there is a photo of the actual pcb we can identify the display hardware properly.

I still have to guess about the ILI9488 controller when it is a write-only Shield.
The Uno Shields are write-read which means that I can read the actual ID from the controller chip. (which is not visible to humans)

David.

Yes I am sorry about that. The instructions in #6 are great, but I do not know how to utilize them. Where do they go in the code? Can I add those to my own code (which isn't written yet) ? How do they work?

Follow the instructions. i.e. you must edit the actual library files.
e.g. on my PC

C:\Users\David Prentice\Documents\Arduino\libraries\Mcufriend_kbv\utility\mcufriend_shield.h

C:\Users\David Prentice\Documents\Arduino\libraries\Mcufriend_kbv\utility\mcufriend_special.h

Don't worry. It is not difficult. Both edits only involve removing the // from a comment.

However you do need to edit your sketches. e.g. in setup()

    uint16_t ID = tft.readID();
    if (ID == 0xD3D3) ID = 0x9488;  // write-only Shield
    tft.begin(ID);

You will find that many of the examples have this sort of sequence. It is important that you use the correct ID. i.e. 0x9488 for your Shield.

What kind of cows do you have ?

David.

What is a cow?

An animal that makes milk and tastes very good.

(I used to be a Dairy Farmer.)

cowboydaniel

cowboydaniel

I guess your thoughts were too complex about the question. I guess you assumed it means something special about touch TFTs or Arduino libraries

Take a look at the code you posted yourself in post # 1

**tft.begin**(ID);

best regards Stefan

Oh. We have two steers. I thought it was a programming word I hadn't heard before! :rofl: :rofl: :rofl:

Background of people always amazes me :+1:

1 Like

Yeah same.

Ok... some news on this problem.
I have got it sort of working with the below code, however it doesn't ever clear the screen. Everything on the bottom half of the screen just prints on top of ALL the previous things that have been displayed. The top half of the screen works PERFECTLY!

// All the mcufriend.com UNO shields have the same pinout.
// i.e. control pins A0-A4.  Data D2-D9.  microSD D10-D13.
// Touchscreens are normally A1, A2, D7, D6 but the order varies
//
// This demo should work with most Adafruit TFT libraries
// If you are not using a shield,  use a full Adafruit constructor()
// e.g. Adafruit_TFTLCD tft(LCD_CS, LCD_CD, LCD_WR, LCD_RD, LCD_RESET);

#define LCD_CS A3 // Chip Select goes to Analog 3
#define LCD_CD A2 // Command/Data goes to Analog 2
#define LCD_WR A1 // LCD Write goes to Analog 1
#define LCD_RD A0 // LCD Read goes to Analog 0
#define LCD_RESET A4 // Can alternately just connect to Arduino's reset pin

#include <SPI.h>          // f.k. for Arduino-1.5.2
#include "Adafruit_GFX.h"// Hardware-specific library
#include <MCUFRIEND_kbv.h>
MCUFRIEND_kbv tft;
//#include <Adafruit_TFTLCD.h>
//Adafruit_TFTLCD tft(LCD_CS, LCD_CD, LCD_WR, LCD_RD, LCD_RESET);

// Assign human-readable names to some common 16-bit color values:
#define	BLACK   0x0000
#define	BLUE    0x001F
#define	RED     0xF800
#define	GREEN   0x07E0
#define CYAN    0x07FF
#define MAGENTA 0xF81F
#define YELLOW  0xFFE0
#define WHITE   0xFFFF

#ifndef min
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif

void setup(void);
void loop(void);
unsigned long testFillScreen();
unsigned long testText();
unsigned long testLines(uint16_t color);
unsigned long testFastLines(uint16_t color1, uint16_t color2);
unsigned long testRects(uint16_t color);
unsigned long testFilledRects(uint16_t color1, uint16_t color2);
unsigned long testFilledCircles(uint8_t radius, uint16_t color);
unsigned long testCircles(uint8_t radius, uint16_t color);
unsigned long testTriangles();
unsigned long testFilledTriangles();
unsigned long testRoundRects();
unsigned long testFilledRoundRects();
void progmemPrint(const char *str);
void progmemPrintln(const char *str);

void runtests(void);

uint16_t g_identifier;

extern const uint8_t hanzi[];
void showhanzi(unsigned int x, unsigned int y, unsigned char index)
{
    uint8_t i, j, c, first = 1;
    uint8_t *temp = (uint8_t*)hanzi;
    uint16_t color;
    tft.setAddrWindow(x, y, x + 31, y + 31); //设置区域
    temp += index * 128;
    for (j = 0; j < 128; j++)
    {
        c = pgm_read_byte(temp);
        for (i = 0; i < 8; i++)
        {
            if ((c & (1 << i)) != 0)
            {
                color = RED;
            }
            else
            {
                color = BLACK;
            }
            tft.pushColors(&color, 1, first);
            first = 0;
        }
        temp++;
    }
}

void setup(void) {
    Serial.begin(9600);
    uint32_t when = millis();
    //    while (!Serial) ;   //hangs a Leonardo until you connect a Serial
    if (!Serial) delay(5000);           //allow some time for Leonardo
    Serial.println("Serial took " + String((millis() - when)) + "ms to start");
    //    tft.reset();                 //hardware reset
    uint16_t ID = tft.readID(); //
    Serial.print("ID = 0x");
    Serial.println(ID, HEX);
    if (ID == 0xD3D3) ID = 0x9481; // write-only shield
//    ID = 0x9329;                             // force ID
    tft.begin(0x9488);
}

#if defined(MCUFRIEND_KBV_H_)
uint16_t scrollbuf[320];    // my biggest screen is 320x480
#define READGRAM(x, y, buf, w, h)  tft.readGRAM(x, y, buf, w, h)
#else
uint16_t scrollbuf[320];    // Adafruit only does 240x320
// Adafruit can read a block by one pixel at a time
int16_t  READGRAM(int16_t x, int16_t y, uint16_t *block, int16_t w, int16_t h)
{
    uint16_t *p;
    for (int row = 0; row < h; row++) {
        p = block + row * w;
        for (int col = 0; col < w; col++) {
            *p++ = tft.readPixel(x + col, y + row);
        }
    }
}
#endif

void windowScroll(int16_t x, int16_t y, int16_t wid, int16_t ht, int16_t dx, int16_t dy, uint16_t *buf)
{
    if (dx) for (int16_t row = 0; row < ht; row++) {
            READGRAM(x, y + row, buf, wid, 1);
            tft.setAddrWindow(x, y + row, x + wid - 1, y + row);
            tft.pushColors(buf + dx, wid - dx, 1);
            tft.pushColors(buf + 0, dx, 0);
        }
    if (dy) for (int16_t col = 0; col < wid; col++) {
            READGRAM(x + col, y, buf, 1, ht);
            tft.setAddrWindow(x + col, y, x + col, y + ht - 1);
            tft.pushColors(buf + dy, ht - dy, 1);
            tft.pushColors(buf + 0, dy, 0);
        }
}

void printmsg(int row, const char *msg)
{
    tft.setTextColor(YELLOW, BLACK);
    tft.setCursor(0, row);
    tft.println(msg);
}

void loop(void) {
    uint8_t aspect;
    uint16_t pixel;
    const char *aspectname[] = {
        "PORTRAIT", "LANDSCAPE", "PORTRAIT_REV", "LANDSCAPE_REV"
    };
    const char *colorname[] = { "BLUE", "GREEN", "RED", "GRAY" };
    uint16_t colormask[] = { 0x001F, 0x07E0, 0xF800, 0xFFFF };
    uint16_t dx, rgb, n, wid, ht, msglin;
    tft.setRotation(0);
    runtests();
    delay(2000);
    if (tft.height() > 64) {
        for (uint8_t cnt = 0; cnt < 4; cnt++) {
            aspect = (cnt + 0) & 3;
            tft.setRotation(aspect);
            wid = tft.width();
            ht = tft.height();
            msglin = (ht > 160) ? 200 : 112;
            testText();
            dx = wid / 32;
            for (n = 0; n < 32; n++) {
                rgb = n * 8;
                rgb = tft.color565(rgb, rgb, rgb);
                tft.fillRect(n * dx, 48, dx, 63, rgb & colormask[aspect]);
            }
            tft.drawRect(0, 48 + 63, wid, 1, WHITE);
            tft.setTextSize(2);
            tft.setTextColor(colormask[aspect], BLACK);
            tft.setCursor(0, 72);
            tft.print(colorname[aspect]);
            tft.setTextColor(WHITE);
            tft.println(" COLOR GRADES");
            tft.setTextColor(WHITE, BLACK);
            printmsg(184, aspectname[aspect]);
            delay(1000);
            tft.drawPixel(0, 0, YELLOW);
            pixel = tft.readPixel(0, 0);
            tft.setTextSize((ht > 160) ? 2 : 1); //for messages
#if defined(MCUFRIEND_KBV_H_)
#if 1
            extern const uint8_t penguin[];
            tft.setAddrWindow(wid - 40 - 40, 20 + 0, wid - 1 - 40, 20 + 39);
            tft.pushColors(penguin, 1600, 1);
#elif 1
            extern const uint8_t wifi_full[];
            tft.setAddrWindow(wid - 40 - 40, 20 + 0, wid - 40 - 40 + 31, 20 + 31);
            tft.pushColors(wifi_full, 1024, 1, true);
#elif 1
            extern const uint8_t icon_40x40[];
            tft.setAddrWindow(wid - 40 - 40, 20 + 0, wid - 1 - 40, 20 + 39);
            tft.pushColors(icon_40x40, 1600, 1);
#endif
            tft.setAddrWindow(0, 0, wid - 1, ht - 1);
            if (aspect & 1) tft.drawRect(wid - 1, 0, 1, ht, WHITE);
            else tft.drawRect(0, ht - 1, wid, 1, WHITE);
            printmsg(msglin, "VERTICAL SCROLL UP");
            uint16_t maxscroll;
            if (tft.getRotation() & 1) maxscroll = wid;
            else maxscroll = ht;
            for (uint16_t i = 1; i <= maxscroll; i++) {
                tft.vertScroll(0, maxscroll, i);
                delay(10);
            }
            delay(1000);
			printmsg(msglin, "VERTICAL SCROLL DN");
            for (uint16_t i = 1; i <= maxscroll; i++) {
                tft.vertScroll(0, maxscroll, 0 - (int16_t)i);
                delay(10);
            }
			tft.vertScroll(0, maxscroll, 0);
            printmsg(msglin, "SCROLL DISABLED   ");

            delay(1000);
            if ((aspect & 1) == 0) { //Portrait
                tft.setTextColor(BLUE, BLACK);
                printmsg(msglin, "ONLY THE COLOR BAND");
                for (uint16_t i = 1; i <= 64; i++) {
                    tft.vertScroll(48, 64, i);
                    delay(20);
                }
                delay(1000);
            }
#endif
            tft.setTextColor(YELLOW, BLACK);
            if (pixel == YELLOW) {
                printmsg(msglin, "SOFTWARE SCROLL    ");
#if 0
                // diagonal scroll of block
                for (int16_t i = 45, dx = 2, dy = 1; i > 0; i -= dx) {
                    windowScroll(24, 8, 90, 40, dx, dy, scrollbuf);
                }
#else
                // plain horizontal scroll of block
                n = (wid > 320) ? 320 : wid;
                for (int16_t i = n, dx = 4, dy = 0; i > 0; i -= dx) {
                    windowScroll(0, 200, n, 16, dx, dy, scrollbuf);
                }
#endif
            }
            else if (pixel == CYAN)
                tft.println("readPixel() reads as BGR");
            else if ((pixel & 0xF8F8) == 0xF8F8)
                tft.println("readPixel() should be 24-bit");
            else {
                tft.print("readPixel() reads 0x");
                tft.println(pixel, HEX);
            }
            delay(5000);
        }
    }
    printmsg(msglin, "INVERT DISPLAY ");
    tft.invertDisplay(true);
    delay(2000);
    tft.invertDisplay(false);
}

typedef struct {
    PGM_P msg;
    uint32_t ms;
} TEST;
TEST result[12];

#define RUNTEST(n, str, test) { result[n].msg = PSTR(str); result[n].ms = test; delay(500); }

void runtests(void)
{
    uint8_t i, len = 24, cnt;
    uint32_t total;
    RUNTEST(0, "FillScreen               ", testFillScreen());
    RUNTEST(1, "Text                     ", testText());
    RUNTEST(2, "Lines                    ", testLines(CYAN));
    RUNTEST(3, "Horiz/Vert Lines         ", testFastLines(RED, BLUE));
    RUNTEST(4, "Rectangles (outline)     ", testRects(GREEN));
    RUNTEST(5, "Rectangles (filled)      ", testFilledRects(YELLOW, MAGENTA));
    RUNTEST(6, "Circles (filled)         ", testFilledCircles(10, MAGENTA));
    RUNTEST(7, "Circles (outline)        ", testCircles(10, WHITE));
    RUNTEST(8, "Triangles (outline)      ", testTriangles());
    RUNTEST(9, "Triangles (filled)       ", testFilledTriangles());
    RUNTEST(10, "Rounded rects (outline)  ", testRoundRects());
    RUNTEST(11, "Rounded rects (filled)   ", testFilledRoundRects());

    tft.fillScreen(BLACK);
    tft.setTextColor(GREEN);
    tft.setCursor(0, 0);
    uint16_t wid = tft.width();
    if (wid > 176) {
        tft.setTextSize(2);
#if defined(MCUFRIEND_KBV_H_)
        tft.print("MCUFRIEND ");
#if MCUFRIEND_KBV_H_ != 0
        tft.print(0.01 * MCUFRIEND_KBV_H_, 2);
#else
        tft.print("for");
#endif
        tft.println(" UNO");
#else
        tft.println("Adafruit-Style Tests");
#endif
    } else len = wid / 6 - 8;
    tft.setTextSize(1);
    total = 0;
    for (i = 0; i < 12; i++) {
        PGM_P str = result[i].msg;
        char c;
        if (len > 24) {
            if (i < 10) tft.print(" ");
            tft.print(i);
            tft.print(": ");
        }
        uint8_t cnt = len;
        while ((c = pgm_read_byte(str++)) && cnt--) tft.print(c);
        tft.print(" ");
        tft.println(result[i].ms);
        total += result[i].ms;
    }
    tft.setTextSize(2);
    tft.print("Total:");
    tft.print(0.000001 * total);
    tft.println("sec");
    g_identifier = tft.readID();
    tft.print("ID: 0x");
    tft.println(tft.readID(), HEX);
//    tft.print("Reg(00):0x");
//    tft.println(tft.readReg(0x00), HEX);
    tft.print("F_CPU:");
    tft.print(0.000001 * F_CPU);
#if defined(__OPTIMIZE_SIZE__)
    tft.println("MHz -Os");
#else
    tft.println("MHz");
#endif

    delay(10000);
}

// Standard Adafruit tests.  will adjust to screen size

unsigned long testFillScreen() {
    unsigned long start = micros();
    tft.fillScreen(BLACK);
    tft.fillScreen(RED);
    tft.fillScreen(GREEN);
    tft.fillScreen(BLUE);
    tft.fillScreen(BLACK);
    return micros() - start;
}

unsigned long testText() {
    unsigned long start;
    tft.fillScreen(BLACK);
    start = micros();
    tft.setCursor(0, 0);
    tft.setTextColor(WHITE);  tft.setTextSize(1);
    tft.println("Hello World!");
    tft.setTextColor(YELLOW); tft.setTextSize(2);
    tft.println(123.45);
    tft.setTextColor(RED);    tft.setTextSize(3);
    tft.println(0xDEADBEEF, HEX);
    tft.println();
    tft.setTextColor(GREEN);
    tft.setTextSize(5);
    tft.println("Groop");
    tft.setTextSize(2);
    tft.println("I implore thee,");
    tft.setTextSize(1);
    tft.println("my foonting turlingdromes.");
    tft.println("And hooptiously drangle me");
    tft.println("with crinkly bindlewurdles,");
    tft.println("Or I will rend thee");
    tft.println("in the gobberwarts");
    tft.println("with my blurglecruncheon,");
    tft.println("see if I don't!");
    return micros() - start;
}

unsigned long testLines(uint16_t color) {
    unsigned long start, t;
    int           x1, y1, x2, y2,
                  w = tft.width(),
                  h = tft.height();

    tft.fillScreen(BLACK);

    x1 = y1 = 0;
    y2    = h - 1;
    start = micros();
    for (x2 = 0; x2 < w; x2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    x2    = w - 1;
    for (y2 = 0; y2 < h; y2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    t     = micros() - start; // fillScreen doesn't count against timing

    tft.fillScreen(BLACK);

    x1    = w - 1;
    y1    = 0;
    y2    = h - 1;
    start = micros();
    for (x2 = 0; x2 < w; x2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    x2    = 0;
    for (y2 = 0; y2 < h; y2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    t    += micros() - start;

    tft.fillScreen(BLACK);

    x1    = 0;
    y1    = h - 1;
    y2    = 0;
    start = micros();
    for (x2 = 0; x2 < w; x2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    x2    = w - 1;
    for (y2 = 0; y2 < h; y2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    t    += micros() - start;

    tft.fillScreen(BLACK);

    x1    = w - 1;
    y1    = h - 1;
    y2    = 0;
    start = micros();
    for (x2 = 0; x2 < w; x2 += 6) tft.drawLine(x1, y1, x2, y2, color);
    x2    = 0;
    for (y2 = 0; y2 < h; y2 += 6) tft.drawLine(x1, y1, x2, y2, color);

    return micros() - start;
}

unsigned long testFastLines(uint16_t color1, uint16_t color2) {
    unsigned long start;
    int           x, y, w = tft.width(), h = tft.height();

    tft.fillScreen(BLACK);
    start = micros();
    for (y = 0; y < h; y += 5) tft.drawFastHLine(0, y, w, color1);
    for (x = 0; x < w; x += 5) tft.drawFastVLine(x, 0, h, color2);

    return micros() - start;
}

unsigned long testRects(uint16_t color) {
    unsigned long start;
    int           n, i, i2,
                  cx = tft.width()  / 2,
                  cy = tft.height() / 2;

    tft.fillScreen(BLACK);
    n     = min(tft.width(), tft.height());
    start = micros();
    for (i = 2; i < n; i += 6) {
        i2 = i / 2;
        tft.drawRect(cx - i2, cy - i2, i, i, color);
    }

    return micros() - start;
}

unsigned long testFilledRects(uint16_t color1, uint16_t color2) {
    unsigned long start, t = 0;
    int           n, i, i2,
                  cx = tft.width()  / 2 - 1,
                  cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    n = min(tft.width(), tft.height());
    for (i = n; i > 0; i -= 6) {
        i2    = i / 2;
        start = micros();
        tft.fillRect(cx - i2, cy - i2, i, i, color1);
        t    += micros() - start;
        // Outlines are not included in timing results
        tft.drawRect(cx - i2, cy - i2, i, i, color2);
    }

    return t;
}

unsigned long testFilledCircles(uint8_t radius, uint16_t color) {
    unsigned long start;
    int x, y, w = tft.width(), h = tft.height(), r2 = radius * 2;

    tft.fillScreen(BLACK);
    start = micros();
    for (x = radius; x < w; x += r2) {
        for (y = radius; y < h; y += r2) {
            tft.fillCircle(x, y, radius, color);
        }
    }

    return micros() - start;
}

unsigned long testCircles(uint8_t radius, uint16_t color) {
    unsigned long start;
    int           x, y, r2 = radius * 2,
                        w = tft.width()  + radius,
                        h = tft.height() + radius;

    // Screen is not cleared for this one -- this is
    // intentional and does not affect the reported time.
    start = micros();
    for (x = 0; x < w; x += r2) {
        for (y = 0; y < h; y += r2) {
            tft.drawCircle(x, y, radius, color);
        }
    }

    return micros() - start;
}

unsigned long testTriangles() {
    unsigned long start;
    int           n, i, cx = tft.width()  / 2 - 1,
                        cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    n     = min(cx, cy);
    start = micros();
    for (i = 0; i < n; i += 5) {
        tft.drawTriangle(
            cx    , cy - i, // peak
            cx - i, cy + i, // bottom left
            cx + i, cy + i, // bottom right
            tft.color565(0, 0, i));
    }

    return micros() - start;
}

unsigned long testFilledTriangles() {
    unsigned long start, t = 0;
    int           i, cx = tft.width()  / 2 - 1,
                     cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    start = micros();
    for (i = min(cx, cy); i > 10; i -= 5) {
        start = micros();
        tft.fillTriangle(cx, cy - i, cx - i, cy + i, cx + i, cy + i,
                         tft.color565(0, i, i));
        t += micros() - start;
        tft.drawTriangle(cx, cy - i, cx - i, cy + i, cx + i, cy + i,
                         tft.color565(i, i, 0));
    }

    return t;
}

unsigned long testRoundRects() {
    unsigned long start;
    int           w, i, i2, red, step,
                  cx = tft.width()  / 2 - 1,
                  cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    w     = min(tft.width(), tft.height());
    start = micros();
    red = 0;
    step = (256 * 6) / w;
    for (i = 0; i < w; i += 6) {
        i2 = i / 2;
        red += step;
        tft.drawRoundRect(cx - i2, cy - i2, i, i, i / 8, tft.color565(red, 0, 0));
    }

    return micros() - start;
}

unsigned long testFilledRoundRects() {
    unsigned long start;
    int           i, i2, green, step,
                  cx = tft.width()  / 2 - 1,
                  cy = tft.height() / 2 - 1;

    tft.fillScreen(BLACK);
    start = micros();
    green = 256;
    step = (256 * 6) / min(tft.width(), tft.height());
    for (i = min(tft.width(), tft.height()); i > 20; i -= 6) {
        i2 = i / 2;
        green -= step;
        tft.fillRoundRect(cx - i2, cy - i2, i, i, i / 8, tft.color565(0, green, 0));
    }

    return micros() - start;
}

I edited the mcufriend_special.h as such:

// only define one "USE_XXX" macro at any time

//#define USE_MEGA_8BIT_PROTOSHIELD
//#define USE_MEGA_8BIT_SHIELD      // 4.7sec Mega2560 Shield
#define USE_MEGA_16BIT_SHIELD     // 2.14sec Mega2560 Shield 
//#define USE_BLD_BST_MEGA32U4
//#define USE_BLD_BST_MEGA2560      // 12.23sec Uno Shield (17.38s C)
//#define USE_DUE_8BIT_PROTOSHIELD
//#define USE_DUE_16BIT_SHIELD        //RD on PA15 (D24) 
//#define USE_BOBCACHELOT_TEENSY
//#define USE_OPENSMART_SHIELD_PINOUT_UNO
//#define USE_OPENSMART_SHIELD_PINOUT_MEGA
//#define USE_OPENSMART_SHIELD_PINOUT_DUE //thanks Michel53
//#define USE_ELECHOUSE_DUE_16BIT_SHIELD    //Untested yet
//#define USE_MY_BLUEPILL
//#define USE_ADIGITALEU_TEENSY

#if 0

#elif defined(__AVR_ATxmega128A1__)   // Home made shield with Xplained
#warning Home made shield with Xplained
#define RD_PORT VPORT0  //PF0.    VPORT0=F, 1=B, 2=C, 3=D
#define RD_PIN  0
#define WR_PORT VPORT0
#define WR_PIN  1
#define CD_PORT VPORT0
#define CD_PIN  2
#define CS_PORT VPORT0
#define CS_PIN  3
#define RESET_PORT VPORT0
#define RESET_PIN  4

// VPORTs are very fast.   CBI, SBI are only one cycle.    Hence all those RD_ACTIVEs
// ILI9320 data sheet says tDDR=100ns.    We need 218ns to read REGs correctly.
#define write_8(x)    { VPORT2.OUT = x; }
#define read_8()      ( VPORT2.IN )
#define setWriteDir() { PORTCFG.VPCTRLA=0x15; PORTCFG.VPCTRLB=0x32; VPORT2.DIR = 0xFF; }
#define setReadDir()  { VPORT2.DIR = 0x00; }
#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; RD_ACTIVE2; RD_ACTIVE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PIN_LOW(p, b)        (p).OUT &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p).OUT |= (1<<(b))
#define PIN_OUTPUT(p, b)     (p).DIR |= (1<<(b))
#elif defined(__AVR_ATxmega32A4U__) || defined(__AVR_ATxmega128A4U__) // Home made shield with Batsocks module
#warning Home made shield with Batsocks module
#define RD_PORT VPORT1   //PB0.   VPORT0=A, 1=B, 2=C, 3=D
#define RD_PIN  0
#define WR_PORT VPORT1
#define WR_PIN  1
#define CD_PORT VPORT1
#define CD_PIN  2
#define CS_PORT VPORT1
#define CS_PIN  3
#define RESET_PORT PORTE
#define RESET_PIN  0

// VPORTs are very fast.   CBI, SBI are only one cycle.    Hence all those RD_ACTIVEs
// ILI9320 data sheet says tDDR=100ns.    We need 218ns to read REGs correctly.
// S6D0154 data sheet says tDDR=250ns.    We need ~500ns to read REGs correctly.
// ST7789 data sheet says tRC=450ns.    We need ~167ns to read REGs correctly. (10 cycles @ 60MHz )
// ST7789 says tRC=160ns for ID and tRC=450ns for Frame Memory
// ILI9341 says tRC=160ns for ID and tRC=450ns for Frame Memory.  They are FASTER
#define WRITE_DELAY   { }
#define READ_DELAY    { RD_ACTIVE4; }
#define write_8(x)    { VPORT2.OUT = x; }
#define read_8()      ( VPORT2.IN )
#define setWriteDir() { PORTCFG.VPCTRLA=0x10; PORTCFG.VPCTRLB=0x32; VPORT2.DIR = 0xFF; }
#define setReadDir()  { VPORT2.DIR = 0x00; }
#define write8(x)     { write_8(x); WRITE_DELAY; WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; READ_DELAY; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PIN_LOW(p, b)        (p).OUT &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p).OUT |= (1<<(b))
#define PIN_OUTPUT(p, b)     (p).DIR |= (1<<(b))

#elif defined(__AVR_ATmega2560__) && defined(USE_BLD_BST_MEGA2560)   //regular UNO shield on MEGA2560 using BLD/BST
#warning regular UNO shield on MEGA2560 using BLD/BST
#define RD_PORT PORTF
#define RD_PIN  0
#define WR_PORT PORTF
#define WR_PIN  1
#define CD_PORT PORTF
#define CD_PIN  2
#define CS_PORT PORTF
#define CS_PIN  3
#define RESET_PORT PORTF
#define RESET_PIN  4

#define EMASK         0x38
#define GMASK         0x20
#define HMASK         0x78
static __attribute((always_inline)) void write_8(uint8_t val)
{
	asm volatile("lds __tmp_reg__,0x0102" "\n\t"
	"BST %0,0" "\n\t" "BLD __tmp_reg__,5" "\n\t"
	"BST %0,1" "\n\t" "BLD __tmp_reg__,6" "\n\t"
	"BST %0,6" "\n\t" "BLD __tmp_reg__,3" "\n\t"
	"BST %0,7" "\n\t" "BLD __tmp_reg__,4" "\n\t"
	"sts 0x0102,__tmp_reg__" : : "a" (val));
	asm volatile("in __tmp_reg__,0x0E" "\n\t"
	"BST %0,2" "\n\t" "BLD __tmp_reg__,4" "\n\t"
	"BST %0,3" "\n\t" "BLD __tmp_reg__,5" "\n\t"
	"BST %0,5" "\n\t" "BLD __tmp_reg__,3" "\n\t"
	"out 0x0E,__tmp_reg__" : : "a" (val));
	asm volatile("in __tmp_reg__,0x14" "\n\t"
	"BST %0,4" "\n\t" "BLD __tmp_reg__,5" "\n\t"
	"out 0x14,__tmp_reg__" : : "a" (val));
}

#define read_8()      ( ((PINH & (3<<5)) >> 5)\
| ((PINE & (3<<4)) >> 2)\
| ((PING & (1<<5)) >> 1)\
| ((PINE & (1<<3)) << 2)\
| ((PINH & (3<<3)) << 3)\
)
#define setWriteDir() { DDRH |=  HMASK; DDRG |=  GMASK; DDRE |=  EMASK;  }
#define setReadDir()  { DDRH &= ~HMASK; DDRG &= ~GMASK; DDRE &= ~EMASK;  }
#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)  { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { RD_STROBE; dst = read_8(); RD_IDLE; RD_STROBE; dst = (dst<<8) | read_8(); RD_IDLE; }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

#elif defined(__AVR_ATmega2560__) && defined(USE_MEGA_16BIT_SHIELD)
#warning USE_MEGA_16BIT_SHIELD
#define USES_16BIT_BUS
#define RD_PORT PORTL
#define RD_PIN  6        //PL6 (D43).   Graham has PA15 (D24) on Due Shield 
#define WR_PORT PORTG
#define WR_PIN  2        //D39 CTE
#define CD_PORT PORTD
#define CD_PIN  7        //D38 CTE
#define CS_PORT PORTG
#define CS_PIN  1        //D40 CTE
#define RESET_PORT PORTG
#define RESET_PIN  0     //D41 CTE

#define write_8(x)    { PORTC = x; }
#define write_16(x)   { PORTA = (x) >> 8; PORTC = x; }

#define read_16()     ( (PINA<<8) | (PINC) )
#define setWriteDir() { DDRC = 0xFF; DDRA = 0xff; }
#define setReadDir()  { DDRC = 0x00; DDRA = 0x00; }
#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { write_16(x); WR_STROBE; }
#define READ_16(dst)  { RD_STROBE; dst = read_16(); RD_IDLE; }
#define READ_8(dst)   { READ_16(dst); dst &= 0xFFFF; }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

#elif defined(__AVR_ATmega2560__) && defined(USE_MEGA_8BIT_SHIELD)
#warning USE_MEGA_8BIT_SHIELD for vagos21
#define RD_PORT PORTL
#define RD_PIN  6        //PL6 (D43).   Graham has PA15 (D24) on Due Shield 
#define WR_PORT PORTG
#define WR_PIN  2        //D39 CTE
#define CD_PORT PORTD
#define CD_PIN  7        //D38 CTE
#define CS_PORT PORTG
#define CS_PIN  1        //D40 CTE
#define RESET_PORT PORTG
#define RESET_PIN  0     //D41 CTE

#define write_8(x)   { PORTA = x;}

#define read_8()      ( PINA )
#define setWriteDir() { DDRA = 0xFF; }
#define setReadDir()  { DDRA = 0x00; }
#define write8(x)     { write_8(x); WR_ACTIVE; WR_STROBE; } // HX8357-D is slower
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { RD_STROBE; dst = read_8(); RD_IDLE; RD_STROBE; dst = (dst<<8) | read_8(); RD_IDLE; }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

#elif defined(__AVR_ATmega2560__) && defined(USE_MEGA_8BIT_PROTOSHIELD)
#warning USE_MEGA_8BIT_PROTOSHIELD
#define RD_PORT PORTF
#define RD_PIN  0
#define WR_PORT PORTF
#define WR_PIN  1
#define CD_PORT PORTF
#define CD_PIN  2
#define CS_PORT PORTF
#define CS_PIN  3
#define RESET_PORT PORTF
#define RESET_PIN  4

#define write_8(x)   { PORTA = x;}

#define read_8()      ( PINA )
#define setWriteDir() { DDRA = 0xFF; }
#define setReadDir()  { DDRA = 0x00; }
#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { RD_STROBE; dst = read_8(); RD_IDLE; RD_STROBE; dst = (dst<<8) | read_8(); RD_IDLE; }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

#elif defined(__AVR_ATmega32U4__) && defined(USE_BLD_BST_MEGA32U4)  //regular UNO shield on Leonardo using BST/BLD
#warning regular UNO shield on Leonardo using BST/BLD
#define RD_PORT PORTF
#define RD_PIN  7
#define WR_PORT PORTF
#define WR_PIN  6
#define CD_PORT PORTF
#define CD_PIN  5
#define CS_PORT PORTF
#define CS_PIN  4
#define RESET_PORT PORTF
#define RESET_PIN  1

#define BMASK         (3<<4)
#define CMASK         (1<<6)
#define DMASK         ((1<<7)|(1<<4)|(3<<0))
#define EMASK         (1<<6)
static __attribute((always_inline)) void write_8(uint8_t val)
{
	asm volatile("in __tmp_reg__,0x05" "\n\t"
	"BST %0,0" "\n\t" "BLD __tmp_reg__,4" "\n\t"
	"BST %0,1" "\n\t" "BLD __tmp_reg__,5" "\n\t"
	"out 0x05,__tmp_reg__" : : "a" (val));
	asm volatile("in __tmp_reg__,0x0B" "\n\t"
	"BST %0,2" "\n\t" "BLD __tmp_reg__,1" "\n\t"
	"BST %0,3" "\n\t" "BLD __tmp_reg__,0" "\n\t"
	"BST %0,4" "\n\t" "BLD __tmp_reg__,4" "\n\t"
	"BST %0,6" "\n\t" "BLD __tmp_reg__,7" "\n\t"
	"out 0x0B,__tmp_reg__" : : "a" (val));
	asm volatile("in __tmp_reg__,0x08" "\n\t"
	"BST %0,5" "\n\t" "BLD __tmp_reg__,6" "\n\t"
	"out 0x08,__tmp_reg__" : : "a" (val));
	asm volatile("in __tmp_reg__,0x0E" "\n\t"
	"BST %0,7" "\n\t" "BLD __tmp_reg__,6" "\n\t"
	"out 0x0E,__tmp_reg__" : : "a" (val));
}
#define read_8()      ( ((PINB & (3<<4)) >> 4)\
| ((PIND & (1<<1)) << 1)\
| ((PIND & (1<<0)) << 3)\
| ((PIND & (1<<4)) >> 0)\
| ((PINC & (1<<6)) >> 1)\
| ((PIND & (1<<7)) >> 1)\
| ((PINE & (1<<6)) << 1)\
)
#define setWriteDir() { DDRB |=  BMASK; DDRC |=  CMASK; DDRD |=  DMASK; DDRE |=  EMASK;  }
#define setReadDir()  { DDRB &= ~BMASK; DDRC &= ~CMASK; DDRD &= ~DMASK; DDRE &= ~EMASK;  }
#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)  { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { RD_STROBE; dst = read_8(); RD_IDLE; RD_STROBE; dst = (dst<<8) | read_8(); RD_IDLE; }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

#elif defined(__SAM3X8E__) && defined(USE_DUE_8BIT_PROTOSHIELD)  //regular UNO shield on DUE
#warning USE_DUE_8BIT_PROTOSHIELD
// configure macros for the control pins
  #define RD_PORT PIOA
  #define RD_PIN  16    //A0
  #define WR_PORT PIOA
  #define WR_PIN  24    //A1
  #define CD_PORT PIOA
  #define CD_PIN  23    //A2
  #define CS_PORT PIOA
  #define CS_PIN  22    //A3
  #define RESET_PORT PIOA
  #define RESET_PIN  6  //A4
// configure macros for data bus
#define DMASK         (0xFF<<0)
#define write_8(x)   {  PIOD->PIO_CODR = DMASK; PIOD->PIO_SODR = x; }

#define read_8()      ( PIOD->PIO_PDSR & DMASK)
  #define setWriteDir() { PIOD->PIO_OER = DMASK; PIOD->PIO_PER = DMASK; }
  #define setReadDir()  { PMC->PMC_PCER0 = (1 << ID_PIOD); PIOD->PIO_ODR = DMASK;}      
#define write8(x)     { write_8(x); WR_ACTIVE; WR_STROBE; WR_IDLE; WR_IDLE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; RD_ACTIVE4; dst = read_8(); RD_IDLE; RD_IDLE; RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }
// Shield Control macros.
#define PIN_LOW(port, pin)    (port)->PIO_CODR = (1<<(pin))
#define PIN_HIGH(port, pin)   (port)->PIO_SODR = (1<<(pin))
#define PIN_OUTPUT(port, pin) (port)->PIO_OER = (1<<(pin))

#elif defined(__SAM3X8E__) && defined(USE_DUE_16BIT_SHIELD)  //regular CTE shield on DUE
#warning USE_DUE_16BIT_SHIELD
#define USES_16BIT_BUS
// configure macros for the control pins
#define RD_PORT PIOA
#define RD_PIN  15     //D24 Graham
#define WR_PORT PIOD
#define WR_PIN  1      //D26
#define CD_PORT PIOD
#define CD_PIN  0      //D25
#define CS_PORT PIOD
#define CS_PIN  2      //D27
#define RESET_PORT PIOD
#define RESET_PIN  3   //D28
// configure macros for data bus 
// DB0..DB7 on PIOC1..PIOC8,  DB8..DB15 on PIOC12..PIOC19
// 
#define CMASKH        (0xFF00<<4)
#define CMASKL        (0x00FF<<1)
#define CMASK         (CMASKH | CMASKL)
#define write_8(x)    { PIOC->PIO_CODR = CMASKL; PIOC->PIO_SODR = (((x)&0x00FF)<<1); }
#define write_16(x)   { PIOC->PIO_CODR = CMASK; \
                        PIOC->PIO_SODR = (((x)&0x00FF)<<1)|(((x)&0xFF00)<<4); }
#define read_16()     (((PIOC->PIO_PDSR & CMASKH)>>4)|((PIOC->PIO_PDSR & CMASKL)>>1) )
#define read_8()      (read_16() & 0xFF)
#define setWriteDir() { PIOC->PIO_OER = CMASK; PIOC->PIO_PER = CMASK; }
#define setReadDir()  { PMC->PMC_PCER0 = (1 << ID_PIOC); PIOC->PIO_ODR = CMASK; }
#define write8(x)     { write16(x & 0xFF); }
#define write16(x)    { write_16(x); WR_ACTIVE; WR_STROBE; WR_IDLE; WR_IDLE; }
#define READ_16(dst)  { RD_STROBE; RD_ACTIVE4; dst = read_16(); RD_IDLE; RD_IDLE; RD_IDLE; }
#define READ_8(dst)   { READ_16(dst); dst &= 0xFF; }

// Shield Control macros.
#define PIN_LOW(port, pin)    (port)->PIO_CODR = (1<<(pin))
#define PIN_HIGH(port, pin)   (port)->PIO_SODR = (1<<(pin))
#define PIN_OUTPUT(port, pin) (port)->PIO_OER = (1<<(pin))

#elif defined(__SAM3X8E__) && defined(USE_ELECHOUSE_DUE_16BIT_SHIELD)  //ELECHOUSE_DUE shield on DUE
#warning USE_ELECHOUSE_DUE_16BIT_SHIELD
#define USES_16BIT_BUS
// configure macros for the control pins
#define RD_PORT PIOA
#define RD_PIN  15     //D24 Graham
#define WR_PORT PIOA
#define WR_PIN  14     //D23
#define CD_PORT PIOB
#define CD_PIN  26     //D22
#define CS_PORT PIOA
#define CS_PIN  7      //D31
#define RESET_PORT PIOC
#define RESET_PIN  1   //D33
// configure macros for data bus 
// DB0..DB7 on PIOC2..PIOC9,  DB8..DB15 on PIOC12..PIOC19
// 
#define CMASKH        (0xFF00<<4)
#define CMASKL        (0x00FF<<2)
#define CMASK         (CMASKH | CMASKL)
#define write_8(x)    { PIOC->PIO_CODR = CMASKL; PIOC->PIO_SODR = (((x)&0x00FF)<<2); }
#define write_16(x)   { PIOC->PIO_CODR = CMASK; \
                        PIOC->PIO_SODR = (((x)&0x00FF)<<2)|(((x)&0xFF00)<<4); }
#define read_16()     (((PIOC->PIO_PDSR & CMASKH)>>4)|((PIOC->PIO_PDSR & CMASKL)>>2) )
#define read_8()      (read_16() & 0xFF)
#define setWriteDir() { PIOC->PIO_OER = CMASK; PIOC->PIO_PER = CMASK; }
#define setReadDir()  { PMC->PMC_PCER0 = (1 << ID_PIOC); PIOC->PIO_ODR = CMASK; }
#define write8(x)     { write16(x & 0xFF); }
#define write16(x)    { write_16(x); WR_ACTIVE; WR_STROBE; WR_IDLE; WR_IDLE; }
#define READ_16(dst)  { RD_STROBE; RD_ACTIVE4; dst = read_16(); RD_IDLE; RD_IDLE; RD_IDLE; }
#define READ_8(dst)   { READ_16(dst); dst &= 0xFF; }

// Shield Control macros.
#define PIN_LOW(port, pin)    (port)->PIO_CODR = (1<<(pin))
#define PIN_HIGH(port, pin)   (port)->PIO_SODR = (1<<(pin))
#define PIN_OUTPUT(port, pin) (port)->PIO_OER = (1<<(pin))

#elif defined(__SAM3X8E__) && defined(USE_MEGA_16BIT_SHIELD)  //regular MEGA shield on DUE
#warning USE_MEGA_16BIT_SHIELD
#define USES_16BIT_BUS
// configure macros for the control pins
#define RD_PORT PIOA
#define RD_PIN  20     //D43
#define WR_PORT PIOC
#define WR_PIN  7      //D39
#define CD_PORT PIOC
#define CD_PIN  6      //D38
#define CS_PORT PIOC
#define CS_PIN  8      //D40
#define RESET_PORT PIOC
#define RESET_PIN  9   //D41
// configure macros for data bus 
// 
#define AMASK         ((1<<7)|(3<<14))          //PA7, PA14-PA15
#define BMASK         (1<<26)                   //PB26
#define CMASK         (31<<1)                   //PC1-PC5
#define DMASK         ((15<<0)|(1<<6)|(3<<9))   //PD0-PD3, PD6, PD9-PD10

#define write_16(x)   { PIOA->PIO_CODR = AMASK; PIOB->PIO_CODR = BMASK; PIOC->PIO_CODR = CMASK; PIOD->PIO_CODR = DMASK; \
                        PIOA->PIO_SODR = (((x)&(1<<6))<<1)|(((x)&(3<<9))<<5); \
                        PIOB->PIO_SODR = (((x)&(1<<8))<<18); \
                        PIOC->PIO_SODR = (((x)&(1<<0))<<5); \
                        PIOC->PIO_SODR = (((x)&(1<<1))<<3); \
                        PIOC->PIO_SODR = (((x)&(1<<2))<<1); \
                        PIOC->PIO_SODR = (((x)&(1<<3))>>1); \
                        PIOC->PIO_SODR = (((x)&(1<<4))>>3); \
                        PIOD->PIO_SODR = (((x)&(1<<7))<<2)|(((x)&(1<<5))<<5)|(((x)&(15<<11))>>11)|(((x)&(1<<15))>>9); \
					  }

/*
#define write_16(VL)   { PIOA->PIO_CODR = AMASK; PIOC->PIO_CODR = CMASK; PIOD->PIO_CODR = DMASK; \
		REG_PIOA_SODR=((((VL)>>8) & 0x06)<<13) | ((VL & 0x40)<<1);\
		if ((VL)&(1<<8)) REG_PIOB_SODR=(1<<26); else REG_PIOB_CODR=(1<<26);\
		REG_PIOC_SODR=((VL & 0x01)<<5) | ((VL & 0x02)<<3) | ((VL & 0x04)<<1) | ((VL & 0x08)>>1) | ((VL & 0x10)>>3);\
		REG_PIOD_SODR=((((VL)>>8) & 0x78)>>3) | ((((VL)>>8) & 0x80)>>1) | ((VL & 0x20)<<5) | ((VL & 0x80)<<2);\
}
*/
#define read_16()     ( 0\
                        |((PIOC->PIO_PDSR & (1<<5))>>5)\
                        |((PIOC->PIO_PDSR & (1<<4))>>3)\
                        |((PIOC->PIO_PDSR & (1<<3))>>1)\
                        |((PIOC->PIO_PDSR & (1<<2))<<1)\
                        |((PIOC->PIO_PDSR & (1<<1))<<3)\
                        |((PIOD->PIO_PDSR & (1<<10))>>5)\
                        |((PIOA->PIO_PDSR & (1<<7))>>1)\
                        |((PIOD->PIO_PDSR & (1<<9))>>2)\
                        |((PIOB->PIO_PDSR & (1<<26))>>18)\
                        |((PIOA->PIO_PDSR & (3<<14))>>5)\
                        |((PIOD->PIO_PDSR & (15<<0))<<11)\
                        |((PIOD->PIO_PDSR & (1<<6))<<9)\
                      )
#define read_8()      (read_16() & 0xFF)
#define setWriteDir() {\
                        PIOA->PIO_OER = AMASK; PIOA->PIO_PER = AMASK; \
                        PIOB->PIO_OER = BMASK; PIOB->PIO_PER = BMASK; \
                        PIOC->PIO_OER = CMASK; PIOC->PIO_PER = CMASK; \
                        PIOD->PIO_OER = DMASK; PIOD->PIO_PER = DMASK; \
                      }
#define setReadDir()  { \
                        PMC->PMC_PCER0 = (1 << ID_PIOA)|(1 << ID_PIOB)|(1 << ID_PIOC)|(1 << ID_PIOD); \
						PIOA->PIO_ODR = AMASK; \
						PIOB->PIO_ODR = BMASK; \
						PIOC->PIO_ODR = CMASK; \
						PIOD->PIO_ODR = DMASK; \
					  }
#define write8(x)     { write16(x & 0xFF); }
// ILI9486 is slower than ILI9481
#define write16(x)    { write_16(x); WR_ACTIVE2; WR_STROBE; }
#define READ_16(dst)  { RD_STROBE; RD_ACTIVE4; dst = read_16(); RD_IDLE; RD_IDLE; RD_IDLE; }
#define READ_8(dst)   { READ_16(dst); dst &= 0xFF; }

// Shield Control macros.
#define PIN_LOW(port, pin)    (port)->PIO_CODR = (1<<(pin))
#define PIN_HIGH(port, pin)   (port)->PIO_SODR = (1<<(pin))
#define PIN_OUTPUT(port, pin) (port)->PIO_OER = (1<<(pin))

#elif defined(__SAM3X8E__) && defined(USE_MEGA_8BIT_SHIELD)  //regular CTE shield on DUE
#warning USE_MEGA_8BIT_SHIELD for peloxp
// configure macros for the control pins
#define RD_PORT PIOA
#define RD_PIN  20     //D43
#define WR_PORT PIOC
#define WR_PIN  7      //D39
#define CD_PORT PIOC
#define CD_PIN  6      //D38
#define CS_PORT PIOC
#define CS_PIN  8      //D40
#define RESET_PORT PIOC
#define RESET_PIN  9   //D41
// configure macros for data bus 
// 
#define AMASK         ((3<<14))                 //PA14-PA15    D23-D24
#define BMASK         (1<<26)                   //PB26         D22
#define DMASK         ((15<<0)|(1<<6))          //PD0-PD3, PD6 D25-D28,D29

#define write_8(x)   { PIOA->PIO_CODR = AMASK; PIOB->PIO_CODR = BMASK; PIOD->PIO_CODR = DMASK; \
                        PIOB->PIO_SODR = (((x)&(1<<0))<<26); \
                        PIOA->PIO_SODR = (((x)&(3<<1))<<13); \
                        PIOD->PIO_SODR = (((x)&(15<<3))>>3); \
                        PIOD->PIO_SODR = (((x)&(1<<7))>>1); \
					  }

#define read_8()     ( 0\
                        |((PIOB->PIO_PDSR & (1<<26))>>26)\
                        |((PIOA->PIO_PDSR & (3<<14))>>13)\
                        |((PIOD->PIO_PDSR & (15<<0))<<3)\
                        |((PIOD->PIO_PDSR & (1<<6))<<1)\
                      )

#define setWriteDir() {\
                        PIOA->PIO_OER = AMASK; PIOA->PIO_PER = AMASK; \
                        PIOB->PIO_OER = BMASK; PIOB->PIO_PER = BMASK; \
                        PIOD->PIO_OER = DMASK; PIOD->PIO_PER = DMASK; \
                      }
#define setReadDir()  { \
                        PMC->PMC_PCER0 = (1 << ID_PIOA)|(1 << ID_PIOB)|(1 << ID_PIOC)|(1 << ID_PIOD); \
						PIOA->PIO_ODR = AMASK; \
						PIOB->PIO_ODR = BMASK; \
						PIOD->PIO_ODR = DMASK; \
					  }

// ILI9486 is slower than ILI9481. HX8357-D is slower
#define write8(x)     { write_8(x); WR_ACTIVE4; WR_STROBE; WR_IDLE; WR_IDLE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; RD_ACTIVE4; dst = read_8(); RD_IDLE; RD_IDLE; RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

// Shield Control macros.
#define PIN_LOW(port, pin)    (port)->PIO_CODR = (1<<(pin))
#define PIN_HIGH(port, pin)   (port)->PIO_SODR = (1<<(pin))
#define PIN_OUTPUT(port, pin) (port)->PIO_OER = (1<<(pin))

#elif defined(__SAM3X8E__) && defined(USE_OPENSMART_SHIELD_PINOUT_DUE)  //OPENSMART shield on DUE
#warning USE_OPENSMART_SHIELD_PINOUT on DUE
 // configure macros for the control pins
#define RD_PORT PIOA
#define RD_PIN  16
#define WR_PORT PIOA
#define WR_PIN  24
#define CD_PORT PIOA
#define CD_PIN  23
#define CS_PORT PIOA
#define CS_PIN  22
#define RESET_PORT PIOA
#define RESET_PIN  24  // n/a. so mimic WR_PIN
 // configure macros for data bus
#define BMASK         (1<<27)
#define CMASK         (0x12F << 21)
#define DMASK         (1<<7)
#define write_8(x)   {  PIOB->PIO_CODR = BMASK; PIOC->PIO_CODR = CMASK; PIOD->PIO_CODR = DMASK; \
                        PIOC->PIO_SODR = (((x) & (1<<0)) << 22); \
                        PIOC->PIO_SODR = (((x) & (1<<1)) << 20); \
                        PIOC->PIO_SODR = (((x) & (1<<2)) << 27); \
                        PIOD->PIO_SODR = (((x) & (1<<3)) << 4); \
                        PIOC->PIO_SODR = (((x) & (1<<4)) << 22); \
                        PIOB->PIO_SODR = (((x) & (1<<5)) << 22); \
                        PIOC->PIO_SODR = (((x) & (1<<6)) << 18); \
                        PIOC->PIO_SODR = (((x) & (1<<7)) << 16); \
					 }

#define read_8()      ( ((PIOC->PIO_PDSR & (1<<22)) >> 22)\
                      | ((PIOC->PIO_PDSR & (1<<21)) >> 20)\
                      | ((PIOC->PIO_PDSR & (1<<29)) >> 27)\
                      | ((PIOD->PIO_PDSR & (1<<7))  >> 4)\
                      | ((PIOC->PIO_PDSR & (1<<26)) >> 22)\
                      | ((PIOB->PIO_PDSR & (1<<27)) >> 22)\
                      | ((PIOC->PIO_PDSR & (1<<24)) >> 18)\
                      | ((PIOC->PIO_PDSR & (1<<23)) >> 16)\
                      )
#define setWriteDir() { PIOB->PIO_OER = BMASK; PIOC->PIO_OER = CMASK; PIOD->PIO_OER = DMASK; }
#define setReadDir()  { \
                          PMC->PMC_PCER0 = (1 << ID_PIOB)|(1 << ID_PIOC)|(1 << ID_PIOD);\
						  PIOB->PIO_ODR = BMASK; PIOC->PIO_ODR = CMASK; PIOD->PIO_ODR = DMASK;\
						}
#define write8(x)     { write_8(x); WR_ACTIVE; WR_STROBE; }
//#define write8(x)     { write_8(x); WR_ACTIVE; WR_STROBE; WR_IDLE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; RD_ACTIVE; dst = read_8(); RD_IDLE; RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }
 // Shield Control macros.
#define PIN_LOW(port, pin)    (port)->PIO_CODR = (1<<(pin))
#define PIN_HIGH(port, pin)   (port)->PIO_SODR = (1<<(pin))
#define PIN_OUTPUT(port, pin) (port)->PIO_OER = (1<<(pin))

#elif defined(__MK20DX256__) && defined(USE_BOBCACHELOT_TEENSY) // special for BOBCACHEALOT_TEENSY
#warning  special for BOBCACHEALOT_TEENSY
#define RD_PORT GPIOD
#define RD_PIN  1
#define WR_PORT GPIOC
#define WR_PIN  0
#define CD_PORT GPIOB
#define CD_PIN  0
#define CS_PORT GPIOB
#define CS_PIN  1
#define RESET_PORT GPIOB
#define RESET_PIN  3

// configure macros for the data pins
#define CMASK ((1<<3))
#define DMASK ((1<<0)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<7))

  #define write_8(d) { \
   GPIOC_PCOR = CMASK; GPIOD_PCOR = DMASK; \
   GPIOC_PSOR = (((d) & (1<<1)) << 2); \
   GPIOD_PSOR = (d) & DMASK; \
  } 
  #define read_8() (          (GPIOD_PDIR & DMASK) | (GPIOC_PDIR & (1<<3)) >> 2 )
  #define setWriteDir() {GPIOC_PDDR |=  CMASK;GPIOD_PDDR |=  DMASK; }
  #define setReadDir()  {GPIOC_PDDR &= ~CMASK;GPIOD_PDDR &= ~DMASK; }

#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PASTE(x, y)   x ## y

#define PIN_LOW(port, pin)    PASTE(port, _PCOR) =  (1<<(pin))
#define PIN_HIGH(port, pin)   PASTE(port, _PSOR) =  (1<<(pin))
#define PIN_OUTPUT(port, pin) PASTE(port, _PDDR) |= (1<<(pin))

#elif defined(__AVR_ATmega328P__) && defined(USE_OPENSMART_SHIELD_PINOUT_UNO)
#define RD_PORT PORTC
#define RD_PIN  0
#define WR_PORT PORTC
#define WR_PIN  1
#define CD_PORT PORTC
#define CD_PIN  2
#define CS_PORT PORTC
#define CS_PIN  3
#define RESET_PORT PORTC
#define RESET_PIN  1  // n/a. so mimic WR_PIN

#define BMASK         B00101111
#define DMASK         B11010000

#define write_8(x) {                          \
        PORTD = (PORTD & ~DMASK) | ((x) & DMASK); \
        PORTB = (PORTB & ~BMASK) | ((x) & BMASK);} // STROBEs are defined later

#define read_8()   ((PIND & DMASK) | (PINB & BMASK))

#define setWriteDir() { DDRD |=  DMASK; DDRB |=  BMASK; }
#define setReadDir()  { DDRD &= ~DMASK; DDRB &= ~BMASK; }


#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

#elif defined(__AVR_ATmega2560__) && defined(USE_OPENSMART_SHIELD_PINOUT_MEGA)
#define RD_PORT PORTF
#define RD_PIN  0
#define WR_PORT PORTF
#define WR_PIN  1
#define CD_PORT PORTF
#define CD_PIN  2
#define CS_PORT PORTF
#define CS_PIN  3
#define RESET_PORT PORTF
#define RESET_PIN  1  // n/a. so mimic WR_PIN

#define BMASK         B10110000 //D13, D11, D10
#define GMASK         0x20      //D4
#define HMASK         0x78      //D6, D7, D8, D9

#define write_8(x) {  \
        PORTH = (PORTH&~HMASK)|(((x)&B11000000)>>3)|(((x)&B00000011)<<5); \
        PORTB = (PORTB&~BMASK)|(((x)&B00101100)<<2); \
        PORTG = (PORTG&~GMASK)|(((x)&B00010000)<<1); \
    }
#define read_8()(\
                 ((PINH & B00011000) << 3) | ((PINB & BMASK) >> 2) | \
                 ((PING & GMASK) >> 1) | ((PINH & B01100000) >> 5) )
#define setWriteDir() { DDRH |=  HMASK; DDRB |=  BMASK; DDRG |=  GMASK; }
#define setReadDir()  { DDRH &= ~HMASK; DDRB &= ~BMASK; DDRG &= ~GMASK; }

#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

#elif defined(USE_MY_BLUEPILL) && (defined(ARDUINO_GENERIC_STM32F103C) || defined(ARDUINO_NUCLEO_F103C8))
#warning Uno Shield on MY BLUEPILL

#if defined(ARDUINO_NUCLEO_F103C8)   //regular CMSIS libraries
#define REGS(x) x
#define GPIO_INIT()   { RCC->APB2ENR |= RCC_APB2ENR_IOPAEN | RCC_APB2ENR_IOPBEN | RCC_APB2ENR_IOPCEN | RCC_APB2ENR_IOPDEN | RCC_APB2ENR_AFIOEN; \
        AFIO->MAPR |= AFIO_MAPR_SWJ_CFG_1;}
#else                                                                  //weird Maple libraries
#define REGS(x) regs->x
#endif

#define WRITE_DELAY { }
#define READ_DELAY  { RD_ACTIVE; }
#define GROUP_MODE(port, reg, mask, val)  {port->REGS(reg) = (port->REGS(reg) & ~(mask)) | ((mask)&(val)); }
#define GP_OUT(port, reg, mask)           GROUP_MODE(port, reg, mask, 0x33333333)
#define GP_INP(port, reg, mask)           GROUP_MODE(port, reg, mask, 0x44444444)
#define PIN_OUTPUT(port, pin) {\
        if (pin < 8) {GP_OUT(port, CRL, 0xF<<((pin)<<2));} \
        else {GP_OUT(port, CRH, 0xF<<((pin&7)<<2));} \
    }
#define PIN_INPUT(port, pin) { \
        if (pin < 8) { GP_INP(port, CRL, 0xF<<((pin)<<2)); } \
        else { GP_INP(port, CRH, 0xF<<((pin&7)<<2)); } \
    }
#define PIN_HIGH(port, pin)   (port)-> REGS(BSRR) = (1<<(pin))
#define PIN_LOW(port, pin)    (port)-> REGS(BSRR) = (1<<((pin)+16))

#define RD_PORT GPIOB
#define RD_PIN  1
#define WR_PORT GPIOB
#define WR_PIN  0
#define CD_PORT GPIOA
#define CD_PIN  7
#define CS_PORT GPIOA
#define CS_PIN  6
#define RESET_PORT GPIOA
#define RESET_PIN  5

// configure macros for the data pins
#define AMASK 0x060F
#define BMASK 0x00C0
#define write_8(d)    { GPIOA->REGS(BSRR) = AMASK << 16; GPIOB->REGS(BSRR) = BMASK << 16; \
                       GPIOA->REGS(BSRR) = (((d) & 3) << 9) | (((d) & 0xF0) >> 4); \
                       GPIOB->REGS(BSRR) = (((d) & 0x0C) << 4); \
                       }
#define read_8()      (((GPIOA->REGS(IDR) & (3<<9)) >> 9) | ((GPIOA->REGS(IDR) & (0x0F)) << 4) | ((GPIOB->REGS(IDR) & (3<<6)) >> 4))
//                                     PA10,PA9                     PA3-PA0                         PB7,PB6  
#define setWriteDir() {GP_OUT(GPIOA, CRH, 0xFF0); GP_OUT(GPIOA, CRL, 0xFFFF); GP_OUT(GPIOB, CRL, 0xFF000000); }
#define setReadDir()  {GP_INP(GPIOA, CRH, 0xFF0); GP_INP(GPIOA, CRL, 0xFFFF); GP_INP(GPIOB, CRL, 0xFF000000); }

#define write8(x)     { write_8(x); WRITE_DELAY; WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; READ_DELAY; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

//####################################### ADIGITALEU_TEENSY ############################
//UNTESTED
#elif defined(__MK66FX1M0__) && defined(USE_ADIGITALEU_TEENSY)  // 16bit on a Teensy 3.6
#warning "Teensy 3.6 16bit port C & D only (for now)"
// Note: Port usage explained in UTFT Teensy edition ...\libraries\UTFT\hardware\arm\HW_Teensy3.h"

#define USES_16BIT_BUS

#define WRITE_DELAY { WR_ACTIVE8; }
#define READ_DELAY  { RD_ACTIVE16; }

#define RD_PORT GPIOA
#define RD_PIN 16       //28 RD
#define WR_PORT GPIOA
#define WR_PIN 5        //25 WR
#define CD_PORT GPIOE
#define CD_PIN 26       //24 RS 
#define CS_PORT GPIOA
#define CS_PIN 14       //26 CS
#define RESET_PORT GPIOA
#define RESET_PIN 15    //27 Reset

#define write_8(d) { GPIOC_PDOR = d; } 
#define write_16(d) { GPIOC_PDOR = d; GPIOD_PDOR = (d >> 8);}

#define read_8() (GPIOC_PDIR)
#define read_16() (GPIOC_PDIR | GPIOD_PDIR << 8)

#define setWriteDir() {GPIOC_PDDR |=  0xFF; GPIOD_PDDR |=  0xFF; }
#define setReadDir()  {GPIOC_PDDR &= ~0xFF; GPIOD_PDDR &= ~0xFF; }

#define write8(x)     {write_8(x); WRITE_DELAY; WR_STROBE }
#define write16(x)    {write_16(x); WRITE_DELAY; WR_STROBE }

#define READ_8(dst) { RD_STROBE; READ_DELAY; dst = read_8(); RD_IDLE; } 
#define READ_16(dst) { RD_STROBE; READ_DELAY; dst = read_16(); RD_IDLE;}

//Data: Teensy pins -> D0-D15 :
// Teensy probably initialises some pins for Analog, Timer, Alternate, ...
// so it is probably wise to use pinMode(n, OUTPUT) for all the control and data lines
#define GPIO_INIT() {pinMode(2, OUTPUT); for (int i = 5; i <= 15; i++) pinMode(i, OUTPUT); for (int i = 20; i <= 28; i++) pinMode(i, OUTPUT);}

#define PASTE(x, y) x ## y

#define PIN_LOW(port, pin) PASTE(port, _PCOR) = (1<<(pin))
#define PIN_HIGH(port, pin) PASTE(port, _PSOR) = (1<<(pin))
#define PIN_OUTPUT(port, pin) PASTE(port, _PDDR) |= (1<<(pin))

#else
#define USE_SPECIAL_FAIL
#endif

and the mcufriend_shield.h as below:

#define USE_SPECIAL             //check for custom drivers

#define WR_ACTIVE2  {WR_ACTIVE; WR_ACTIVE;}
#define WR_ACTIVE4  {WR_ACTIVE2; WR_ACTIVE2;}
#define WR_ACTIVE8  {WR_ACTIVE4; WR_ACTIVE4;}
#define RD_ACTIVE2  {RD_ACTIVE; RD_ACTIVE;}
#define RD_ACTIVE4  {RD_ACTIVE2; RD_ACTIVE2;}
#define RD_ACTIVE8  {RD_ACTIVE4; RD_ACTIVE4;}
#define RD_ACTIVE16 {RD_ACTIVE8; RD_ACTIVE8;}

#if defined(USE_SPECIAL)
#include "mcufriend_special.h"
#if !defined(USE_SPECIAL_FAIL)
#warning WE ARE USING A SPECIAL CUSTOM DRIVER
#endif
#endif
#if !defined(USE_SPECIAL) || defined (USE_SPECIAL_FAIL)

#if 0
//################################### UNO ##############################
#elif defined(__AVR_ATmega328P__)       //regular UNO shield on UNO
#define RD_PORT PORTC
#define RD_PIN  0
#define WR_PORT PORTC
#define WR_PIN  1
#define CD_PORT PORTC
#define CD_PIN  2
#define CS_PORT PORTC
#define CS_PIN  3
#define RESET_PORT PORTC
#define RESET_PIN  4

#define BMASK         0x03              //more intuitive style for mixed Ports
#define DMASK         0xFC              //does exactly the same as previous
#define write_8(x)    { PORTB = (PORTB & ~BMASK) | ((x) & BMASK); PORTD = (PORTD & ~DMASK) | ((x) & DMASK); }
#define read_8()      ( (PINB & BMASK) | (PIND & DMASK) )
#define setWriteDir() { DDRB |=  BMASK; DDRD |=  DMASK; }
#define setReadDir()  { DDRB &= ~BMASK; DDRD &= ~DMASK; }
#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

//################################### MEGA2560 ##############################
#elif defined(__AVR_ATmega2560__) || defined(__AVR_ATmega1280__)       //regular UNO shield on MEGA2560
#define RD_PORT PORTF
#define RD_PIN  0
#define WR_PORT PORTF
#define WR_PIN  1
#define CD_PORT PORTF
#define CD_PIN  2
#define CS_PORT PORTF
#define CS_PIN  3
#define RESET_PORT PORTF
#define RESET_PIN  4

#define EMASK         0x38
#define GMASK         0x20
#define HMASK         0x78
#define write_8(x)   {  PORTH &= ~HMASK; PORTG &= ~GMASK; PORTE &= ~EMASK; \
                        PORTH |= (((x) & (3<<0)) << 5); \
                        PORTE |= (((x) & (3<<2)) << 2); \
                        PORTG |= (((x) & (1<<4)) << 1); \
                        PORTE |= (((x) & (1<<5)) >> 2); \
                        PORTH |= (((x) & (3<<6)) >> 3); \
					 }

#define read_8()      ( ((PINH & (3<<5)) >> 5)\
                      | ((PINE & (3<<4)) >> 2)\
                      | ((PING & (1<<5)) >> 1)\
                      | ((PINE & (1<<3)) << 2)\
                      | ((PINH & (3<<3)) << 3)\
                      )
#define setWriteDir() { DDRH |=  HMASK; DDRG |=  GMASK; DDRE |=  EMASK;  }
#define setReadDir()  { DDRH &= ~HMASK; DDRG &= ~GMASK; DDRE &= ~EMASK;  }
#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

//################################# ZERO and M0_PRO ############################
#elif defined(__SAMD21G18A__)   //regular UNO shield on ZERO or M0_PRO
#include "sam.h"
 // configure macros for the control pins
#define RD_PORT PORT->Group[0]
#define RD_PIN  2
#define WR_PORT PORT->Group[1]
#define WR_PIN  8
#define CD_PORT PORT->Group[1]
#define CD_PIN  9
#define CS_PORT PORT->Group[0]
#define CS_PIN  4
#define RESET_PORT PORT->Group[0]
#define RESET_PIN  5
 // configure macros for data bus
#define DMASK 0x0030C3C0
 //  #define write_8(x) PORT->Group[0].OUT.reg = (PORT->Group[0].OUT.reg & ~DMASK)|(((x) & 0x0F) << 6)|(((x) & 0x30) << 10)|(((x) & 0xC0)<<14)
#if defined(ARDUINO_SAMD_ZERO) || defined(ARDUINO_SAMD_ZERO)   // American ZERO
#define write_8(x) {\
	PORT->Group[0].OUTCLR.reg = DMASK;\
	PORT->Group[0].OUTSET.reg = (((x) & 0x0B) << 6)\
                               |(((x) & (1<<2)) << 12)\
	                           |(((x) & (1<<4)) << 4)\
	                           |(((x) & (1<<5)) << 10)\
	                           |(((x) & 0xC0) << 14);\
                   }
#define read_8()   (((PORT->Group[0].IN.reg >> 6) & 0x0B)\
                   |((PORT->Group[0].IN.reg >> 12) & (1<<2))\
                   |((PORT->Group[0].IN.reg >> 4) &  (1<<4))\
                   |((PORT->Group[0].IN.reg >> 10) & (1<<5))\
                   |((PORT->Group[0].IN.reg >> 14) & 0xC0))
#else   //default to an M0_PRO on v1.6.5 or 1.7.6
#define write_8(x) {\
	PORT->Group[0].OUTCLR.reg = DMASK;\
	PORT->Group[0].OUTSET.reg = (((x) & 0x0F) << 6)\
                               |(((x) & 0x30) << 10)\
                               |(((x) & 0xC0) << 14);\
                   }
#define read_8()   (((PORT->Group[0].IN.reg >> 6) & 0x0F)|((PORT->Group[0].IN.reg >> 10) & 0x30)|((PORT->Group[0].IN.reg >> 14) & 0xC0))
#endif
#define setWriteDir() { PORT->Group[0].DIRSET.reg = DMASK; \
	                  PORT->Group[0].WRCONFIG.reg = (DMASK & 0xFFFF) | (0<<22) | (1<<28) | (1<<30); \
	                  PORT->Group[0].WRCONFIG.reg = (DMASK>>16) | (0<<22) | (1<<28) | (1<<30) | (1<<31); \
                        }
#define setReadDir()  { PORT->Group[0].DIRCLR.reg = DMASK; \
	                  PORT->Group[0].WRCONFIG.reg = (DMASK & 0xFFFF) | (1<<17) | (1<<28) | (1<<30); \
	                  PORT->Group[0].WRCONFIG.reg = (DMASK>>16) | (1<<17) | (1<<28) | (1<<30) | (1<<31); \
                        }
#define write8(x)     { write_8(x); WR_ACTIVE; WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }
 // Shield Control macros.
#define PIN_LOW(port, pin)    (port).OUTCLR.reg = (1<<(pin))
#define PIN_HIGH(port, pin)   (port).OUTSET.reg = (1<<(pin))
#define PIN_OUTPUT(port, pin) (port).DIR.reg |= (1<<(pin))

//####################################### DUE ############################
#elif defined(__SAM3X8E__)      //regular UNO shield on DUE
 // configure macros for the control pins
#define RD_PORT PIOA
#define RD_PIN  16
#define WR_PORT PIOA
#define WR_PIN  24
#define CD_PORT PIOA
#define CD_PIN  23
#define CS_PORT PIOA
#define CS_PIN  22
#define RESET_PORT PIOA
#define RESET_PIN  6
 // configure macros for data bus
#define BMASK         (1<<25)
#define CMASK         (0xBF << 21)
#define write_8(x)   {  PIOB->PIO_CODR = BMASK; PIOC->PIO_CODR = CMASK; \
                        PIOB->PIO_SODR = (((x) & (1<<2)) << 23); \
                        PIOC->PIO_SODR = (((x) & (1<<0)) << 22) \
                                       | (((x) & (1<<1)) << 20) \
                                       | (((x) & (1<<3)) << 25) \
                                       | (((x) & (1<<4)) << 22) \
                                       | (((x) & (1<<5)) << 20) \
                                       | (((x) & (1<<6)) << 18) \
                                       | (((x) & (1<<7)) << 16); \
					 }

#define read_8()      ( ((PIOC->PIO_PDSR & (1<<22)) >> 22)\
                      | ((PIOC->PIO_PDSR & (1<<21)) >> 20)\
                      | ((PIOB->PIO_PDSR & (1<<25)) >> 23)\
                      | ((PIOC->PIO_PDSR & (1<<28)) >> 25)\
                      | ((PIOC->PIO_PDSR & (1<<26)) >> 22)\
                      | ((PIOC->PIO_PDSR & (1<<25)) >> 20)\
                      | ((PIOC->PIO_PDSR & (1<<24)) >> 18)\
                      | ((PIOC->PIO_PDSR & (1<<23)) >> 16)\
                      )
#define setWriteDir() { PIOB->PIO_OER = BMASK; PIOC->PIO_OER = CMASK; }
#define setReadDir()  { \
                          PMC->PMC_PCER0 = (1 << ID_PIOB)|(1 << ID_PIOC);\
						  PIOB->PIO_ODR = BMASK; PIOC->PIO_ODR = CMASK;\
						}
#define write8(x)     { write_8(x); WR_ACTIVE2; WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; RD_ACTIVE; dst = read_8(); RD_IDLE; RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }
 // Shield Control macros.
#define PIN_LOW(port, pin)    (port)->PIO_CODR = (1<<(pin))
#define PIN_HIGH(port, pin)   (port)->PIO_SODR = (1<<(pin))
#define PIN_OUTPUT(port, pin) (port)->PIO_OER = (1<<(pin))

//################################### LEONARDO ##############################
#elif defined(__AVR_ATmega32U4__)       //regular UNO shield on Leonardo
#define RD_PORT PORTF
#define RD_PIN  7
#define WR_PORT PORTF
#define WR_PIN  6
#define CD_PORT PORTF
#define CD_PIN  5
#define CS_PORT PORTF
#define CS_PIN  4
#define RESET_PORT PORTF
#define RESET_PIN  1

#define BMASK         (3<<4)
#define CMASK         (1<<6)
#define DMASK         ((1<<7)|(1<<4)|(3<<0))
#define EMASK         (1<<6)
static inline                   //hope we use r24
void write_8(uint8_t x)
{
    PORTB &= ~BMASK;
    PORTC &= ~CMASK;
    PORTD &= ~DMASK;
    PORTE &= ~EMASK;
    PORTB |= (((x) & (3 << 0)) << 4);
    PORTD |= (((x) & (1 << 2)) >> 1);
    PORTD |= (((x) & (1 << 3)) >> 3);
    PORTD |= (((x) & (1 << 4)) << 0);
    PORTC |= (((x) & (1 << 5)) << 1);
    PORTD |= (((x) & (1 << 6)) << 1);
    PORTE |= (((x) & (1 << 7)) >> 1);
}

#define read_8()      ( ((PINB & (3<<4)) >> 4)\
| ((PIND & (1<<1)) << 1)\
| ((PIND & (1<<0)) << 3)\
| ((PIND & (1<<4)) >> 0)\
| ((PINC & (1<<6)) >> 1)\
| ((PIND & (1<<7)) >> 1)\
| ((PINE & (1<<6)) << 1)\
)
#define setWriteDir() { DDRB |=  BMASK; DDRC |=  CMASK; DDRD |=  DMASK; DDRE |=  EMASK;  }
#define setReadDir()  { DDRB &= ~BMASK; DDRC &= ~CMASK; DDRD &= ~DMASK; DDRE &= ~EMASK;  }
#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

//################################### UNO SHIELD on BOBUINO ##############################
#elif defined(__AVR_ATmega1284P__) || defined(__AVR_ATmega644P__) //UNO shield on BOBUINO
#warning regular UNO shield on BOBUINO
#define RD_PORT PORTA
#define RD_PIN  7
#define WR_PORT PORTA
#define WR_PIN  6
#define CD_PORT PORTA
#define CD_PIN  5
#define CS_PORT PORTA
#define CS_PIN  4
#define RESET_PORT PORTA
#define RESET_PIN  3

#define BMASK         0x0F              //
#define DMASK         0x6C              //
#define write_8(x)    { PORTB = (PORTB & ~BMASK) | ((x) >> 4); \
        PORTD = (PORTD & ~DMASK) | ((x) & 0x0C) | (((x) & 0x03) << 5); }
#define read_8()      ( (PINB << 4) | (PIND & 0x0C) | ((PIND & 0x60) >> 5) )
#define setWriteDir() { DDRB |=  BMASK; DDRD |=  DMASK; }
#define setReadDir()  { DDRB &= ~BMASK; DDRD &= ~DMASK; }
#define write8(x)     { write_8(x); WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PIN_LOW(p, b)        (p) &= ~(1<<(b))
#define PIN_HIGH(p, b)       (p) |= (1<<(b))
#define PIN_OUTPUT(p, b)     *(&p-1) |= (1<<(b))

//####################################### TEENSY ############################
#elif defined(__MK20DX128__) || defined(__MK20DX256__) || defined(__MK64FX512__) || defined(__MK66FX1M0__) // regular UNO shield on a Teensy 3.x
#warning regular UNO shield on a Teensy 3.x

#if defined(__MK20DX128__) || defined(__MK20DX256__) // Teensy3.0 || 3.2 96MHz
#define WRITE_DELAY { WR_ACTIVE2; }
#define READ_DELAY  { RD_ACTIVE8; RD_ACTIVE; }
#elif defined(__MK64FX512__) // Teensy3.5 120MHz thanks to PeteJohno
#define WRITE_DELAY { WR_ACTIVE4; }
#define READ_DELAY  { RD_ACTIVE8; }
#elif defined(__MK66FX1M0__) // Teensy3.6 180MHz untested.   delays can possibly be reduced.
#define WRITE_DELAY { WR_ACTIVE8; }
#define READ_DELAY  { RD_ACTIVE16; }
#else
#error unspecified delays
#endif

#define RD_PORT GPIOD
#define RD_PIN 1
#define WR_PORT GPIOC
#define WR_PIN 0
#define CD_PORT GPIOB
#define CD_PIN 0
#define CS_PORT GPIOB
#define CS_PIN 1
#define RESET_PORT GPIOB
#define RESET_PIN 3

// configure macros for the data pins
#define AMASK ((1<<12)|(1<<13))
#define CMASK ((1<<3))
#define DMASK ((1<<0)|(1<<2)|(1<<3)|(1<<4)|(1<<7))

#define write_8(d) { \
        GPIOA_PCOR = AMASK; GPIOC_PCOR = CMASK; GPIOD_PCOR = DMASK; \
        GPIOA_PSOR = (((d) & (1 << 3)) << 9) \
                     | (((d) & (1 << 4)) << 9); \
        GPIOC_PSOR = (((d) & (1 << 1)) << 2); \
        GPIOD_PSOR = (((d) & (1 << 0)) << 3) \
                     | (((d) & (1 << 2)) >> 2) \
                     | (((d) & (1 << 5)) << 2) \
                     | (((d) & (1 << 6)) >> 2) \
                     | (((d) & (1 << 7)) >> 5); \
        }
#define read_8() ((((GPIOD_PDIR & (1<<3)) >> 3) \
                   | ((GPIOC_PDIR & (1 << 3)) >> 2) \
                   | ((GPIOD_PDIR & (1 << 0)) << 2) \
                   | ((GPIOA_PDIR & (1 << 12)) >> 9) \
                   | ((GPIOA_PDIR & (1 << 13)) >> 9) \
                   | ((GPIOD_PDIR & (1 << 7)) >> 2) \
                   | ((GPIOD_PDIR & (1 << 4)) << 2) \
                   | ((GPIOD_PDIR & (1 << 2)) << 5)))
#define setWriteDir() {GPIOA_PDDR |= AMASK;GPIOC_PDDR |= CMASK;GPIOD_PDDR |= DMASK; }
#define setReadDir() {GPIOA_PDDR &= ~AMASK;GPIOC_PDDR &= ~CMASK;GPIOD_PDDR &= ~DMASK; }
#define write8(x) { write_8(x); WRITE_DELAY; WR_STROBE; } //PJ adjusted
#define write16(x) { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst) { RD_STROBE; READ_DELAY; dst = read_8(); RD_IDLE; } //PJ adjusted
#define READ_16(dst) { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }
//#define GPIO_INIT() {SIM_SCGC5 |= 0x3E00;}  //PORTA-PORTE
#define GPIO_INIT() {for (int i = 2; i <= 9; i++) pinMode(i, OUTPUT); for (int i = A0; i <= A4; i++) pinMode(i, OUTPUT);}

#define PASTE(x, y) x ## y

#define PIN_LOW(port, pin) PASTE(port, _PCOR) = (1<<(pin))
#define PIN_HIGH(port, pin) PASTE(port, _PSOR) = (1<<(pin))
#define PIN_OUTPUT(port, pin) PASTE(port, _PDDR) |= (1<<(pin))

//####################################### STM32 ############################
// NUCLEO:   ARDUINO_NUCLEO_xxxx from ST Core or ARDUINO_STM_NUCLEO_F103RB from MapleCore
// BLUEPILL: ARDUINO_NUCLEO_F103C8 / ARDUINO_BLUEPILL_F103C8 from ST Core or ARDUINO_GENERIC_STM32F103C from MapleCore
// MAPLE_REV3: n/a from ST Core or ARDUINO_MAPLE_REV3 from MapleCore
// ST Core:   ARDUINO_ARCH_STM32
// MapleCore: __STM32F1__
#elif defined(__STM32F1__) || defined(ARDUINO_ARCH_STM32)   //MapleCore or ST Core
#define IS_NUCLEO64 ( defined(ARDUINO_STM_NUCLEO_F103RB) \
                   || defined(ARDUINO_NUCLEO_F030R8) || defined(ARDUINO_NUCLEO_F091RC) \
                   || defined(ARDUINO_NUCLEO_F103RB) || defined(ARDUINO_NUCLEO_F303RE) \
                   || defined(ARDUINO_NUCLEO_F401RE) || defined(ARDUINO_NUCLEO_F411RE) \
                   || defined(ARDUINO_NUCLEO_F446RE) || defined(ARDUINO_NUCLEO_L053R8) \
                   || defined(ARDUINO_NUCLEO_L152RE) || defined(ARDUINO_NUCLEO_L476RG) \
                    )
// F1xx, F4xx, L4xx have different registers and styles.  General Macros
#if defined(__STM32F1__)   //weird Maple Core
#define REGS(x) regs->x
#else                      //regular ST Core
#define REGS(x) x
#endif
#define PIN_HIGH(port, pin)   (port)-> REGS(BSRR) = (1<<(pin))
#define PIN_LOW(port, pin)    (port)-> REGS(BSRR) = (1<<((pin)+16))
#define PIN_MODE2(reg, pin, mode) reg=(reg&~(0x3<<((pin)<<1)))|(mode<<((pin)<<1))
#define GROUP_MODE(port, reg, mask, val)  {port->REGS(reg) = (port->REGS(reg) & ~(mask)) | ((mask)&(val)); }

// Family specific Macros.  F103 needs ST and Maple compatibility
// note that ILI9320 class of controller has much slower Read cycles
#if 0
#elif defined(__STM32F1__) || defined(ARDUINO_NUCLEO_F103C8) || defined(ARDUINO_BLUEPILL_F103C8) || defined(ARDUINO_NUCLEO_F103RB)
#define WRITE_DELAY { }
#define READ_DELAY  { RD_ACTIVE; }
#if defined(__STM32F1__)  //MapleCore crts.o does RCC.  not understand regular syntax anyway
#define GPIO_INIT()      
#else
#define GPIO_INIT()   { RCC->APB2ENR |= RCC_APB2ENR_IOPAEN | RCC_APB2ENR_IOPBEN | RCC_APB2ENR_IOPCEN | RCC_APB2ENR_IOPDEN | RCC_APB2ENR_AFIOEN; \
        AFIO->MAPR |= AFIO_MAPR_SWJ_CFG_1;}
#endif
#define GP_OUT(port, reg, mask)           GROUP_MODE(port, reg, mask, 0x33333333)
#define GP_INP(port, reg, mask)           GROUP_MODE(port, reg, mask, 0x44444444)
#define PIN_OUTPUT(port, pin) {\
        if (pin < 8) {GP_OUT(port, CRL, 0xF<<((pin)<<2));} \
        else {GP_OUT(port, CRH, 0xF<<((pin&7)<<2));} \
    }
#define PIN_INPUT(port, pin) { \
        if (pin < 8) { GP_INP(port, CRL, 0xF<<((pin)<<2)); } \
        else { GP_INP(port, CRH, 0xF<<((pin&7)<<2)); } \
    }

// should be easy to add F030, F091, F303, L053, ...
#elif defined(STM32F030x8)
#define WRITE_DELAY { }
#define READ_DELAY  { RD_ACTIVE; }
#define GPIO_INIT()   { RCC->AHBENR |= RCC_AHBENR_GPIOAEN | RCC_AHBENR_GPIOBEN | RCC_AHBENR_GPIOCEN; }
#define PIN_OUTPUT(port, pin) PIN_MODE2((port)->MODER, pin, 0x1)

#elif defined(STM32F091xC)
#define WRITE_DELAY { }
#define READ_DELAY  { RD_ACTIVE; }
#define GPIO_INIT()   { RCC->AHBENR |= RCC_AHBENR_GPIOAEN | RCC_AHBENR_GPIOBEN | RCC_AHBENR_GPIOCEN; }
#define PIN_OUTPUT(port, pin) PIN_MODE2((port)->MODER, pin, 0x1)

#elif defined(STM32F303xE)
#define WRITE_DELAY { }
#define READ_DELAY  { RD_ACTIVE; }
#define GPIO_INIT()   { RCC->AHBENR |= RCC_AHBENR_GPIOAEN | RCC_AHBENR_GPIOBEN | RCC_AHBENR_GPIOCEN; \
                      /* AFIO->MAPR |= AFIO_MAPR_SWJ_CFG_1; */ }

#elif defined(STM32F401xE)
#define WRITE_DELAY { WR_ACTIVE2; }
#define READ_DELAY  { RD_ACTIVE4; }
#define GPIO_INIT()   { RCC->AHB1ENR |= RCC_AHB1ENR_GPIOAEN | RCC_AHB1ENR_GPIOBEN | RCC_AHB1ENR_GPIOCEN; }
#define PIN_OUTPUT(port, pin) PIN_MODE2((port)->MODER, pin, 0x1)

#elif defined(STM32F411xE)
#define WRITE_DELAY { WR_ACTIVE2; WR_ACTIVE; }
#define READ_DELAY  { RD_ACTIVE4; RD_ACTIVE2; }
#define GPIO_INIT()   { RCC->AHB1ENR |= RCC_AHB1ENR_GPIOAEN | RCC_AHB1ENR_GPIOBEN | RCC_AHB1ENR_GPIOCEN; }
#define PIN_OUTPUT(port, pin) PIN_MODE2((port)->MODER, pin, 0x1)

#elif defined(STM32F446xx)
#define WRITE_DELAY { WR_ACTIVE8; }
#define READ_DELAY  { RD_ACTIVE16;}
#define GPIO_INIT()   { RCC->AHB1ENR |= RCC_AHB1ENR_GPIOAEN | RCC_AHB1ENR_GPIOBEN | RCC_AHB1ENR_GPIOCEN; }
#define PIN_OUTPUT(port, pin) PIN_MODE2((port)->MODER, pin, 0x1)

#elif defined(STM32L053xx)
#define WRITE_DELAY { }
#define READ_DELAY  { RD_ACTIVE; }
#define GPIO_INIT()   { RCC->IOPENR |= RCC_IOPENR_GPIOAEN | RCC_IOPENR_GPIOBEN | RCC_IOPENR_GPIOCEN; }
#define PIN_OUTPUT(port, pin) PIN_MODE2((port)->MODER, pin, 0x1)

#elif defined(STM32L152xE)
#define WRITE_DELAY { }
#define READ_DELAY  { RD_ACTIVE; }
#define GPIO_INIT()   { RCC->AHBENR |= RCC_AHBENR_GPIOAEN | RCC_AHBENR_GPIOBEN | RCC_AHBENR_GPIOCEN; }
#define PIN_OUTPUT(port, pin) PIN_MODE2((port)->MODER, pin, 0x1)

#elif defined(STM32L476xx)
#define WRITE_DELAY { WR_ACTIVE2; }
#define READ_DELAY  { RD_ACTIVE4; RD_ACTIVE; }
#define GPIO_INIT()   { RCC->AHB2ENR |= RCC_AHB2ENR_GPIOAEN | RCC_AHB2ENR_GPIOBEN | RCC_AHB2ENR_GPIOCEN; }
#define PIN_OUTPUT(port, pin) PIN_MODE2((port)->MODER, pin, 0x1)

#else
#error unsupported STM32
#endif

#if 0
#elif defined(ARDUINO_GENERIC_STM32F103C) || defined(ARDUINO_NUCLEO_F103C8) || defined(ARDUINO_BLUEPILL_F103C8)
#warning Uno Shield on BLUEPILL
#define RD_PORT GPIOB
//#define RD_PIN  5
#define RD_PIN  0  //hardware mod to Adapter.  Allows use of PB5 for SD Card
#define WR_PORT GPIOB
#define WR_PIN  6
#define CD_PORT GPIOB
#define CD_PIN  7
#define CS_PORT GPIOB
#define CS_PIN  8
#define RESET_PORT GPIOB
#define RESET_PIN  9

// configure macros for the data pins
#define write_8(d)    { GPIOA->REGS(BSRR) = 0x00FF << 16; GPIOA->REGS(BSRR) = (d) & 0xFF; }
#define read_8()      (GPIOA->REGS(IDR) & 0xFF)
//                                         PA7 ..PA0
#define setWriteDir() {GP_OUT(GPIOA, CRL, 0xFFFFFFFF); }
#define setReadDir()  {GP_INP(GPIOA, CRL, 0xFFFFFFFF); }

#elif IS_NUCLEO64 // Uno Shield on NUCLEO
#warning Uno Shield on NUCLEO
#define RD_PORT GPIOA
#define RD_PIN  0
#define WR_PORT GPIOA
#define WR_PIN  1
#define CD_PORT GPIOA
#define CD_PIN  4
#define CS_PORT GPIOB
#define CS_PIN  0
#define RESET_PORT GPIOC
#define RESET_PIN  1

// configure macros for the data pins
#define write_8(d) { \
        GPIOA->REGS(BSRR) = 0x0700 << 16; \
        GPIOB->REGS(BSRR) = 0x0438 << 16; \
        GPIOC->REGS(BSRR) = 0x0080 << 16; \
        GPIOA->REGS(BSRR) = (  ((d) & (1<<0)) << 9) \
                            | (((d) & (1<<2)) << 8) \
                            | (((d) & (1<<7)) << 1); \
        GPIOB->REGS(BSRR) = (  ((d) & (1<<3)) << 0) \
                            | (((d) & (1<<4)) << 1) \
                            | (((d) & (1<<5)) >> 1) \
                            | (((d) & (1<<6)) << 4); \
        GPIOC->REGS(BSRR) = (  ((d) & (1<<1)) << 6); \
    }

#define read_8() (       (  (  (GPIOA->REGS(IDR) & (1<<9)) >> 9) \
                            | ((GPIOC->REGS(IDR) & (1<<7)) >> 6) \
                            | ((GPIOA->REGS(IDR) & (1<<10)) >> 8) \
                            | ((GPIOB->REGS(IDR) & (1<<3)) >> 0) \
                            | ((GPIOB->REGS(IDR) & (1<<5)) >> 1) \
                            | ((GPIOB->REGS(IDR) & (1<<4)) << 1) \
                            | ((GPIOB->REGS(IDR) & (1<<10)) >> 4) \
                            | ((GPIOA->REGS(IDR) & (1<<8))  >> 1)))


#if defined(ARDUINO_NUCLEO_F103RB) || defined(ARDUINO_STM_NUCLEO_F103RB) //F103 has unusual GPIO modes
//                                 PA10,PA9,PA8                       PB10                   PB5,PB4,PB3                             PC7
#define setWriteDir() {GP_OUT(GPIOA, CRH, 0xFFF); GP_OUT(GPIOB, CRH, 0xF00); GP_OUT(GPIOB, CRL, 0xFFF000); GP_OUT(GPIOC, CRL, 0xF0000000); }
#define setReadDir()  {GP_INP(GPIOA, CRH, 0xFFF); GP_INP(GPIOB, CRH, 0xF00); GP_INP(GPIOB, CRL, 0xFFF000); GP_INP(GPIOC, CRL, 0xF0000000); }
#else      //F0xx, F3xx, F4xx, L0xx, L1xx, L4xx use MODER
//                                   PA10,PA9,PA8           PB10,PB5,PB4,PB3                      PC7
#define setWriteDir() { setReadDir(); \
                        GPIOA->MODER |=  0x150000; GPIOB->MODER |=  0x100540; GPIOC->MODER |=  0x4000; }
#define setReadDir()  { GPIOA->MODER &= ~0x3F0000; GPIOB->MODER &= ~0x300FC0; GPIOC->MODER &= ~0xC000; }
#endif

#elif defined(ARDUINO_MAPLE_REV3) // Uno Shield on MAPLE_REV3 board
#warning Uno Shield on MAPLE_REV3 board
#define RD_PORT GPIOC
#define RD_PIN  0
#define WR_PORT GPIOC
#define WR_PIN  1
#define CD_PORT GPIOC
#define CD_PIN  2
#define CS_PORT GPIOC
#define CS_PIN  3
#define RESET_PORT GPIOC
#define RESET_PIN  4

// configure macros for the data pins
#define write_8(d) { \
        GPIOA->REGS(BSRR) = 0x0703 << 16; \
        GPIOB->REGS(BSRR) = 0x00E0 << 16; \
        GPIOA->REGS(BSRR) = (  ((d) & (1<<0)) << 10) \
                            | (((d) & (1<<2)) >> 2) \
                            | (((d) & (1<<3)) >> 2) \
                            | (((d) & (1<<6)) << 2) \
                            | (((d) & (1<<7)) << 2); \
        GPIOB->REGS(BSRR) = (  ((d) & (1<<1)) << 6) \
                            | (((d) & (1<<4)) << 1) \
                            | (((d) & (1<<5)) << 1); \
    }

#define read_8()  (     (   (  (GPIOA->REGS(IDR) & (1<<10)) >> 10) \
                            | ((GPIOB->REGS(IDR) & (1<<7)) >> 6) \
                            | ((GPIOA->REGS(IDR) & (1<<0)) << 2) \
                            | ((GPIOA->REGS(IDR) & (1<<1)) << 2) \
                            | ((GPIOB->REGS(IDR) & (1<<5)) >> 1) \
                            | ((GPIOB->REGS(IDR) & (1<<6)) >> 1) \
                            | ((GPIOA->REGS(IDR) & (1<<8)) >> 2) \
                            | ((GPIOA->REGS(IDR) & (1<<9)) >> 2)))

//                                 PA10,PA9,PA8                   PA1,PA0                     PB7,PB6,PB5
#define setWriteDir() {GP_OUT(GPIOA, CRH, 0xFFF); GP_OUT(GPIOA, CRL, 0xFF); GP_OUT(GPIOB, CRL, 0xFFF00000); }
#define setReadDir()  {GP_INP(GPIOA, CRH, 0xFFF); GP_INP(GPIOA, CRL, 0xFF); GP_INP(GPIOB, CRL, 0xFFF00000); }

#else
#error REGS group
#endif

#define write8(x)     { write_8(x); WRITE_DELAY; WR_STROBE; WR_IDLE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; READ_DELAY; dst = read_8(); RD_IDLE; RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

//################################### ESP32 ##############################
#elif defined(ESP32)       //regular UNO shield on TTGO D1 R32 (ESP32)
#define LCD_RD  2  //LED
#define LCD_WR  4
#define LCD_RS 15  //hard-wired to A2 (GPIO35) 
#define LCD_CS 33  //hard-wired to A3 (GPIO34)
#define LCD_RST 32 //hard-wired to A4 (GPIO36)

#define LCD_D0 12
#define LCD_D1 13
#define LCD_D2 26
#define LCD_D3 25
#define LCD_D4 17
#define LCD_D5 16
#define LCD_D6 27
#define LCD_D7 14

#define RD_PORT GPIO.out
#define RD_PIN  LCD_RD
#define WR_PORT GPIO.out
#define WR_PIN  LCD_WR
#define CD_PORT GPIO.out
#define CD_PIN  LCD_RS
#define CS_PORT GPIO.out1.val
#define CS_PIN  LCD_CS
#define RESET_PORT GPIO.out1.val
#define RESET_PIN  LCD_RST

static inline uint32_t map_8(uint32_t d)
{
    return (
               0
               | ((d & (1 << 0)) << (LCD_D0 - 0))
               | ((d & (1 << 1)) << (LCD_D1 - 1))
               | ((d & (1 << 2)) << (LCD_D2 - 2))
               | ((d & (1 << 3)) << (LCD_D3 - 3))
               | ((d & (1 << 4)) << (LCD_D4 - 4))
               | ((d & (1 << 5)) << (LCD_D5 - 5))
               | ((d & (1 << 6)) << (LCD_D6 - 6))
               | ((d & (1 << 7)) << (LCD_D7 - 7))
           );
}

static inline uint8_t map_32(uint32_t d)
{
    return (
               0
               | ((d & (1 << LCD_D0)) >> (LCD_D0 - 0))
               | ((d & (1 << LCD_D1)) >> (LCD_D1 - 1))
               | ((d & (1 << LCD_D2)) >> (LCD_D2 - 2))
               | ((d & (1 << LCD_D3)) >> (LCD_D3 - 3))
               | ((d & (1 << LCD_D4)) >> (LCD_D4 - 4))
               | ((d & (1 << LCD_D5)) >> (LCD_D5 - 5))
               | ((d & (1 << LCD_D6)) >> (LCD_D6 - 6))
               | ((d & (1 << LCD_D7)) >> (LCD_D7 - 7))
           );
}

static inline void write_8(uint16_t data)
{
    GPIO.out_w1tc = map_8(0xFF);  //could define once as DMASK
    GPIO.out_w1ts = map_8(data);
}

static inline uint8_t read_8()
{
    return map_32(GPIO.in);
}
static void setWriteDir()
{
    pinMode(LCD_D0, OUTPUT);
    pinMode(LCD_D1, OUTPUT);
    pinMode(LCD_D2, OUTPUT);
    pinMode(LCD_D3, OUTPUT);
    pinMode(LCD_D4, OUTPUT);
    pinMode(LCD_D5, OUTPUT);
    pinMode(LCD_D6, OUTPUT);
    pinMode(LCD_D7, OUTPUT);
}

static void setReadDir()
{
    pinMode(LCD_D0, INPUT);
    pinMode(LCD_D1, INPUT);
    pinMode(LCD_D2, INPUT);
    pinMode(LCD_D3, INPUT);
    pinMode(LCD_D4, INPUT);
    pinMode(LCD_D5, INPUT);
    pinMode(LCD_D6, INPUT);
    pinMode(LCD_D7, INPUT);
}

#define WRITE_DELAY { }
#define READ_DELAY  { }

#define write8(x)     { write_8(x); WRITE_DELAY; WR_STROBE; }
#define write16(x)    { uint8_t h = (x)>>8, l = x; write8(h); write8(l); }
#define READ_8(dst)   { RD_STROBE; READ_DELAY; dst = read_8(); RD_IDLE; }
#define READ_16(dst)  { uint8_t hi; READ_8(hi); READ_8(dst); dst |= (hi << 8); }

#define PIN_LOW(p, b)        (digitalWrite(b, LOW))
#define PIN_HIGH(p, b)       (digitalWrite(b, HIGH))
#define PIN_OUTPUT(p, b)     (pinMode(b, OUTPUT))

#else
#error MCU unsupported
#endif                          // regular UNO shields on Arduino boards

#endif                          //!defined(USE_SPECIAL) || defined (USE_SPECIAL_FAIL)

#define RD_ACTIVE  PIN_LOW(RD_PORT, RD_PIN)
#define RD_IDLE    PIN_HIGH(RD_PORT, RD_PIN)
#define RD_OUTPUT  PIN_OUTPUT(RD_PORT, RD_PIN)
#define WR_ACTIVE  PIN_LOW(WR_PORT, WR_PIN)
#define WR_IDLE    PIN_HIGH(WR_PORT, WR_PIN)
#define WR_OUTPUT  PIN_OUTPUT(WR_PORT, WR_PIN)
#define CD_COMMAND PIN_LOW(CD_PORT, CD_PIN)
#define CD_DATA    PIN_HIGH(CD_PORT, CD_PIN)
#define CD_OUTPUT  PIN_OUTPUT(CD_PORT, CD_PIN)
#define CS_ACTIVE  PIN_LOW(CS_PORT, CS_PIN)
#define CS_IDLE    PIN_HIGH(CS_PORT, CS_PIN)
#define CS_OUTPUT  PIN_OUTPUT(CS_PORT, CS_PIN)
#define RESET_ACTIVE  PIN_LOW(RESET_PORT, RESET_PIN)
#define RESET_IDLE    PIN_HIGH(RESET_PORT, RESET_PIN)
#define RESET_OUTPUT  PIN_OUTPUT(RESET_PORT, RESET_PIN)

 // General macros.   IOCLR registers are 1 cycle when optimised.
#define WR_STROBE { WR_ACTIVE; WR_IDLE; }       //PWLW=TWRL=50ns
#define RD_STROBE RD_IDLE, RD_ACTIVE, RD_ACTIVE, RD_ACTIVE      //PWLR=TRDL=150ns, tDDR=100ns

#if !defined(GPIO_INIT)
#define GPIO_INIT()
#endif
#define CTL_INIT()   { GPIO_INIT(); RD_OUTPUT; WR_OUTPUT; CD_OUTPUT; CS_OUTPUT; RESET_OUTPUT; }
#define WriteCmd(x)  { CD_COMMAND; write16(x); CD_DATA; }
#define WriteData(x) { write16(x); }

I just cannot work out why the bottom half of the screen is not clearing, or filling with colours?
Does anyone have any ideas?
Thanks in advance!

Please do NOT copy-paste the complete library files.
Just quote the file by name. And copy-paste your "changes" e.g.

I edited the mcufriend_special.h as such:

#define USE_MEGA_16BIT_SHIELD     // 2.14sec Mega2560 Shield

and the mcufriend_shield.h as below:

#define USE_SPECIAL             //check for custom drivers

and edited graphictest_kbv.ino setup()

    tft.begin(0x9488);

Then I can replicate what you are doing. When you paste large amounts of text I have to compare every line with the library code. Just to see what you might have changed.

Confession. I am not prepared to do this work when you could have given the information.

Do you possess the anti-static bag that the display was packed in?
Please read the label. Quote the part number etc.

David.