A bit of Googling and I found an answer for at least an approximation. And hey, isn't most stuff an approximation? This test sketch:
const boolean SHOW_RESULTS = false;
const unsigned long ITERATIONS = 50000;
void setup ()
{
Serial.begin (115200);
Serial.println ();
unsigned long start;
Serial.println ("By shifting:");
start = millis ();
for (unsigned long n = 0; n < ITERATIONS; n++)
{
volatile unsigned long n2, n4, n32, n64, n102, ans;
n2 = n << 1; // n * 2
n4 = n2 << 1; // n * 4
n32 = n4 << 3; // n * 32
n64 = n32 << 1; // n * 64
n102 = n64 + n32 + n4 + n2; // n * 102
ans = n102 >> 10; // n * 102 / 1024
if (SHOW_RESULTS)
{
Serial.print ("n = ");
Serial.print (n, DEC);
Serial.print (" n / 10 = ");
Serial.println (ans, DEC);
}
} // end for loop
Serial.print ("time taken = ");
Serial.println (millis () - start, DEC);
Serial.println ("By multiplying and then shifting:");
start = millis ();
for (unsigned long n = 0; n < ITERATIONS; n++)
{
volatile unsigned long ans;
ans = (n * 102) >> 10; // n * 102 / 1024
if (SHOW_RESULTS)
{
Serial.print ("n = ");
Serial.print (n, DEC);
Serial.print (" n / 10 = ");
Serial.println (ans, DEC);
}
} // end for loop
Serial.print ("time taken = ");
Serial.println (millis () - start, DEC);
Serial.println ("By dividing:");
start = millis ();
for (unsigned long n = 0; n < ITERATIONS; n++)
{
volatile unsigned long ans;
ans = n / 10;
if (SHOW_RESULTS)
{
Serial.print ("n = ");
Serial.print (n, DEC);
Serial.print (" n / 10 = ");
Serial.println (ans, DEC);
}
} // end for loop
Serial.print ("time taken = ");
Serial.println (millis () - start, DEC);
} // end setup
void loop() {}
As written, I get this:
By shifting:
time taken = 771
By multiplying and then shifting:
time taken = 294
By dividing:
time taken = 1956
So, the basic method is to multiply by 102 and then shift right 10 bits (ie. divide by 1024). This is an approximation in the sense that we want to multiply by 0.1 but are actually multiplying by 102/1024 (0.09961).
I tried three ways, the first was to multiply by 102 by doing assorted shifts, the second was to let the compiler do the multiplication, and the third to let the compiler do the division.
The figures show that the division was the slowest (1956 mS), my shifting idea second slowest (771 mS) and the multiply-then-shift fastest (294 mS). In fact the multiply-then-shift is over 6 times as fast, if you can stand the loss of precision.
Obviously this is only really useful if you know the numbers in advance. It's also interesting that my attempt to second-guess the compiler and do a "more efficient" multiply by 102 was actually quite a lot slower than what the compiler generated, left to its own devices.