View Single Post
Old 05-21-2004, 09:34 AM   #1
doctorow
Guru
doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.doctorow ought to be getting tired of karma fortunes by now.
 
doctorow's Avatar
 
Posts: 914
Karma: 3410461
Join Date: May 2004
Device: Kindle Touch
Google PageRank Checksum Algorithm

Update 2: The working PHP source is now available here!

Update: Added the missing switch table.


You probably know about Google's PageRank. PageRank is Google's indicator for the general importance of a page. To display PageRanks, you need Google's toolbar (browser plug-in for IE).

Well, there is also a way to display PageRanks without the toolbar. You can request directly the PageRank of domain.com with the following url (without line breaks):

http://www.google.com/search?
client=navclient-auto&ch=0123456789&
features=Rank&
q=info:http://www.domain.com/

The key is the parameter "ch", which transfers a checksum for the URL to Google, whereby this checksum can only change when the Toolbar version is updated by Google. This checksum is also not publicly known.

I was able to determine the underlying algorithm for calculating the checksum in Google's Toolbar 2.0.111:

Code:
GOOGLECHECK	proc near

var_8		= dword	ptr -8
var_4		= dword	ptr -4
url_offset	= dword	ptr  8
url_length	= dword	ptr  0Ch
magic_dword	= dword	ptr  10h

		push	ebp
		mov	ebp, esp
		push	ecx
		push	ecx
		mov	eax, [ebp+url_length]
		cmp	eax, 0Ch
		push	ebx
		push	esi
		mov	esi, [ebp+magic_dword] ; = 0xE6359A60
		push	edi
		mov	edi, 9E3779B9h	; derived from the golden number, hi TEA ;)
		mov	ebx, edi
		mov	[ebp+var_4], eax
		jb	jump_1
		push	0Ch
		pop	ecx
		xor	edx, edx
		div	ecx
		mov	ecx, [ebp+url_offset]
		mov	[ebp+var_8], eax

loop_1:
		movzx	eax, byte ptr [ecx+7]
		movzx	edx, byte ptr [ecx+6]
		shl	eax, 8
		add	eax, edx
		movzx	edx, byte ptr [ecx+5]
		shl	eax, 8
		add	eax, edx
		movzx	edx, byte ptr [ecx+4]
		add	edx, edi
		shl	eax, 8
		lea	edi, [edx+eax]
		movzx	eax, byte ptr [ecx+0Bh]
		movzx	edx, byte ptr [ecx+0Ah]
		shl	eax, 8
		add	eax, edx
		movzx	edx, byte ptr [ecx+9]
		shl	eax, 8
		add	eax, edx
		movzx	edx, byte ptr [ecx+8]
		add	edx, esi
		shl	eax, 8
		lea	esi, [edx+eax]
		movzx	edx, byte ptr [ecx+3]
		movzx	eax, byte ptr [ecx+2]
		shl	edx, 8
		add	edx, eax
		movzx	eax, byte ptr [ecx+1]
		shl	edx, 8
		add	edx, eax
		movzx	eax, byte ptr [ecx]
		shl	edx, 8
		add	edx, eax
		sub	edx, edi
		sub	edx, esi
		mov	eax, esi
		shr	eax, 0Dh
		add	edx, ebx
		xor	edx, eax
		sub	edi, edx
		sub	edi, esi
		mov	eax, edx
		shl	eax, 8
		xor	edi, eax
		sub	esi, edi
		sub	esi, edx
		mov	eax, edi
		shr	eax, 0Dh
		xor	esi, eax
		sub	edx, edi
		sub	edx, esi
		mov	eax, esi
		shr	eax, 0Ch
		xor	edx, eax
		sub	edi, edx
		sub	edi, esi
		mov	eax, edx
		shl	eax, 10h
		xor	edi, eax
		sub	esi, edi
		sub	[ebp+var_4], 0Ch
		sub	esi, edx
		mov	eax, edi
		shr	eax, 5
		xor	esi, eax
		sub	edx, edi
		mov	eax, esi
		shr	eax, 3
		sub	edx, esi
		xor	edx, eax
		mov	ebx, edx
		sub	edi, ebx
		sub	edi, esi
		mov	eax, ebx
		shl	eax, 0Ah
		xor	edi, eax
		sub	esi, edi
		mov	eax, edi
		sub	esi, ebx
		shr	eax, 0Fh
		xor	esi, eax
		add	ecx, 0Ch
		dec	[ebp+var_8]
		jnz	loop_1
		jmp	short jump_2

jump_1:
		mov	ecx, [ebp+url_offset]

jump_2:
		add	esi, [ebp+url_length]
		mov	eax, [ebp+var_4]
		dec	eax
		cmp	eax, 0Ah	; switch 11 cases
		ja	defaultswitch	; default
		jmp	ds:off_100307EA[eax*4] ; switch	jump

switch_10:
		movzx	eax, byte ptr [ecx+0Ah]	; case 0xA
		shl	eax, 18h
		add	esi, eax

switch_9:
		movzx	eax, byte ptr [ecx+9] ;	case 0x9
		shl	eax, 10h
		add	esi, eax

switch_8:
		movzx	eax, byte ptr [ecx+8] ;	case 0x8
		shl	eax, 8
		add	esi, eax

switch_7:
		movzx	eax, byte ptr [ecx+7] ;	case 0x7
		movzx	edx, byte ptr [ecx+6]
		shl	eax, 8
		add	eax, edx
		movzx	edx, byte ptr [ecx+5]
		shl	eax, 8
		add	eax, edx
		movzx	edx, byte ptr [ecx+4]
		shl	eax, 8
		add	edx, edi
		lea	edi, [edx+eax]
		jmp	short switch_3	; case 0x3

switch_6:
		movzx	eax, byte ptr [ecx+6] ;	case 0x6
		shl	eax, 10h
		add	edi, eax

switch_5:
		movzx	eax, byte ptr [ecx+5] ;	case 0x5
		shl	eax, 8
		add	edi, eax

switch_4:
		movzx	eax, byte ptr [ecx+4] ;	case 0x4
		add	edi, eax

switch_3:
		movzx	eax, byte ptr [ecx+3] ;	case 0x3
		movzx	edx, byte ptr [ecx+2]
		shl	eax, 8
		add	eax, edx
		movzx	edx, byte ptr [ecx+1]
		movzx	ecx, byte ptr [ecx]
		shl	eax, 8
		add	eax, edx
		shl	eax, 8
		add	ecx, ebx
		lea	ebx, [ecx+eax]
		jmp	short defaultswitch ; default

switch_2:
		movzx	eax, byte ptr [ecx+2] ;	case 0x2
		shl	eax, 10h
		add	ebx, eax

switch_1:
		movzx	eax, byte ptr [ecx+1] ;	case 0x1
		shl	eax, 8
		add	ebx, eax

switch_0:
		movzx	eax, byte ptr [ecx] ; case 0x0
		add	ebx, eax

defaultswitch:
		sub	ebx, edi	; default
		sub	ebx, esi
		mov	eax, esi
		shr	eax, 0Dh
		xor	ebx, eax
		sub	edi, ebx
		sub	edi, esi
		mov	eax, ebx
		shl	eax, 8
		xor	edi, eax
		sub	esi, edi
		sub	esi, ebx
		mov	eax, edi
		shr	eax, 0Dh
		xor	esi, eax
		sub	ebx, edi
		sub	ebx, esi
		mov	eax, esi
		shr	eax, 0Ch
		xor	ebx, eax
		sub	edi, ebx
		sub	edi, esi
		mov	eax, ebx
		shl	eax, 10h
		xor	edi, eax
		sub	esi, edi
		mov	eax, edi
		sub	esi, ebx
		shr	eax, 5
		xor	esi, eax
		sub	ebx, edi
		mov	eax, esi
		mov	ecx, eax
		sub	ebx, eax
		shr	ecx, 3
		xor	ebx, ecx
		sub	edi, ebx
		sub	edi, eax
		mov	ecx, ebx
		shl	ecx, 0Ah
		xor	edi, ecx
		sub	eax, edi
		sub	eax, ebx
		shr	edi, 0Fh
		xor	eax, edi
		pop	edi
		pop	esi
		pop	ebx
		leave
		retn
GOOGLECHECK	endp

; Switch table
off_100307EA	
		dd offset switch_0
		dd offset switch_1
		dd offset switch_2
		dd offset switch_3
		dd offset switch_4
		dd offset switch_5
		dd offset switch_6
		dd offset switch_7
		dd offset switch_8
		dd offset switch_9
		dd offset switch_10
At the end, eax holds the checksum in hex.
Now my question: Can anyone rewrite this code snippet into PHP?

Andy

Last edited by doctorow; 06-27-2004 at 08:57 AM.
doctorow is offline   Reply With Quote