|
|
View Full Version : Google PageRank Checksum Algorithm
doctorow 05-21-2004, 10:34 AM Update 2: The working PHP source is now available here (http://www.mobileread.com/forums/showpost.php?p=7769&postcount=87)!
Update: Added the missing switch table.
You probably know about Google's PageRank. PageRank is Google's indicator for the general importance of a page. To display PageRanks, you need Google's toolbar (browser plug-in for IE).
Well, there is also a way to display PageRanks without the toolbar. You can request directly the PageRank of domain.com with the following url (without line breaks):
http://www.google.com/search?
client=navclient-auto&ch=0123456789&
features=Rank&
q=info:http://www.domain.com/
The key is the parameter "ch", which transfers a checksum for the URL to Google, whereby this checksum can only change when the Toolbar version is updated by Google. This checksum is also not publicly known.
I was able to determine the underlying algorithm for calculating the checksum in Google's Toolbar 2.0.111:
GOOGLECHECK proc near
var_8 = dword ptr -8
var_4 = dword ptr -4
url_offset = dword ptr 8
url_length = dword ptr 0Ch
magic_dword = dword ptr 10h
push ebp
mov ebp, esp
push ecx
push ecx
mov eax, [ebp+url_length]
cmp eax, 0Ch
push ebx
push esi
mov esi, [ebp+magic_dword] ; = 0xE6359A60
push edi
mov edi, 9E3779B9h ; derived from the golden number, hi TEA ;)
mov ebx, edi
mov [ebp+var_4], eax
jb jump_1
push 0Ch
pop ecx
xor edx, edx
div ecx
mov ecx, [ebp+url_offset]
mov [ebp+var_8], eax
loop_1:
movzx eax, byte ptr [ecx+7]
movzx edx, byte ptr [ecx+6]
shl eax, 8
add eax, edx
movzx edx, byte ptr [ecx+5]
shl eax, 8
add eax, edx
movzx edx, byte ptr [ecx+4]
add edx, edi
shl eax, 8
lea edi, [edx+eax]
movzx eax, byte ptr [ecx+0Bh]
movzx edx, byte ptr [ecx+0Ah]
shl eax, 8
add eax, edx
movzx edx, byte ptr [ecx+9]
shl eax, 8
add eax, edx
movzx edx, byte ptr [ecx+8]
add edx, esi
shl eax, 8
lea esi, [edx+eax]
movzx edx, byte ptr [ecx+3]
movzx eax, byte ptr [ecx+2]
shl edx, 8
add edx, eax
movzx eax, byte ptr [ecx+1]
shl edx, 8
add edx, eax
movzx eax, byte ptr [ecx]
shl edx, 8
add edx, eax
sub edx, edi
sub edx, esi
mov eax, esi
shr eax, 0Dh
add edx, ebx
xor edx, eax
sub edi, edx
sub edi, esi
mov eax, edx
shl eax, 8
xor edi, eax
sub esi, edi
sub esi, edx
mov eax, edi
shr eax, 0Dh
xor esi, eax
sub edx, edi
sub edx, esi
mov eax, esi
shr eax, 0Ch
xor edx, eax
sub edi, edx
sub edi, esi
mov eax, edx
shl eax, 10h
xor edi, eax
sub esi, edi
sub [ebp+var_4], 0Ch
sub esi, edx
mov eax, edi
shr eax, 5
xor esi, eax
sub edx, edi
mov eax, esi
shr eax, 3
sub edx, esi
xor edx, eax
mov ebx, edx
sub edi, ebx
sub edi, esi
mov eax, ebx
shl eax, 0Ah
xor edi, eax
sub esi, edi
mov eax, edi
sub esi, ebx
shr eax, 0Fh
xor esi, eax
add ecx, 0Ch
dec [ebp+var_8]
jnz loop_1
jmp short jump_2
jump_1:
mov ecx, [ebp+url_offset]
jump_2:
add esi, [ebp+url_length]
mov eax, [ebp+var_4]
dec eax
cmp eax, 0Ah ; switch 11 cases
ja defaultswitch ; default
jmp ds:off_100307EA[eax*4] ; switch jump
switch_10:
movzx eax, byte ptr [ecx+0Ah] ; case 0xA
shl eax, 18h
add esi, eax
switch_9:
movzx eax, byte ptr [ecx+9] ; case 0x9
shl eax, 10h
add esi, eax
switch_8:
movzx eax, byte ptr [ecx+8] ; case 0x8
shl eax, 8
add esi, eax
switch_7:
movzx eax, byte ptr [ecx+7] ; case 0x7
movzx edx, byte ptr [ecx+6]
shl eax, 8
add eax, edx
movzx edx, byte ptr [ecx+5]
shl eax, 8
add eax, edx
movzx edx, byte ptr [ecx+4]
shl eax, 8
add edx, edi
lea edi, [edx+eax]
jmp short switch_3 ; case 0x3
switch_6:
movzx eax, byte ptr [ecx+6] ; case 0x6
shl eax, 10h
add edi, eax
switch_5:
movzx eax, byte ptr [ecx+5] ; case 0x5
shl eax, 8
add edi, eax
switch_4:
movzx eax, byte ptr [ecx+4] ; case 0x4
add edi, eax
switch_3:
movzx eax, byte ptr [ecx+3] ; case 0x3
movzx edx, byte ptr [ecx+2]
shl eax, 8
add eax, edx
movzx edx, byte ptr [ecx+1]
movzx ecx, byte ptr [ecx]
shl eax, 8
add eax, edx
shl eax, 8
add ecx, ebx
lea ebx, [ecx+eax]
jmp short defaultswitch ; default
switch_2:
movzx eax, byte ptr [ecx+2] ; case 0x2
shl eax, 10h
add ebx, eax
switch_1:
movzx eax, byte ptr [ecx+1] ; case 0x1
shl eax, 8
add ebx, eax
switch_0:
movzx eax, byte ptr [ecx] ; case 0x0
add ebx, eax
defaultswitch:
sub ebx, edi ; default
sub ebx, esi
mov eax, esi
shr eax, 0Dh
xor ebx, eax
sub edi, ebx
sub edi, esi
mov eax, ebx
shl eax, 8
xor edi, eax
sub esi, edi
sub esi, ebx
mov eax, edi
shr eax, 0Dh
xor esi, eax
sub ebx, edi
sub ebx, esi
mov eax, esi
shr eax, 0Ch
xor ebx, eax
sub edi, ebx
sub edi, esi
mov eax, ebx
shl eax, 10h
xor edi, eax
sub esi, edi
mov eax, edi
sub esi, ebx
shr eax, 5
xor esi, eax
sub ebx, edi
mov eax, esi
mov ecx, eax
sub ebx, eax
shr ecx, 3
xor ebx, ecx
sub edi, ebx
sub edi, eax
mov ecx, ebx
shl ecx, 0Ah
xor edi, ecx
sub eax, edi
sub eax, ebx
shr edi, 0Fh
xor eax, edi
pop edi
pop esi
pop ebx
leave
retn
GOOGLECHECK endp
; Switch table
off_100307EA
dd offset switch_0
dd offset switch_1
dd offset switch_2
dd offset switch_3
dd offset switch_4
dd offset switch_5
dd offset switch_6
dd offset switch_7
dd offset switch_8
dd offset switch_9
dd offset switch_10
At the end, eax holds the checksum in hex.
Now my question: Can anyone rewrite this code snippet into PHP?
Andy
doctorow 06-02-2004, 04:06 PM Somebody asked me the following, and I thought it better to show the answer to everyone:
Hi,
I was looking at the checksum algorithm for Pagerank and I am trying to figure it out myself.
My question is how do you get the value for these variables?
var_8
var_4
url_offset
url_length
magic_dword
var_8 and var_4 are local stack variables. They are uninitialized at the beginning of the algorithm function.
url_offset is a pointer to the URL of the web site we want to check the rank for.
url_length is the length of the URL of the web site we want to check the rank for.
magic_dword is a static 32bit word being passed to the function. As shown in the code, it is 0xE6359A60.
Unregistered 06-08-2004, 12:07 PM Would it be possible to use that assembly code to make a program in C and assembly to generate the checksums?
doctorow 06-08-2004, 02:22 PM Sure it would be. You can actually compile the assembler stub above as an .obj and then easily link it in any c application (I've been doing this already, actually).
But it would be much more interesting to have above's code in .php, allowing for various web applications!
palmar 06-10-2004, 07:59 AM Hello
Assembler told me:
undefined symbol : off_100307EA
Can you tell what is that identifier mean?
And how it should be declared, defined, set?
Thanks.
doctorow 06-12-2004, 02:56 PM off_100307ea is a jump table that I forgot to include. I will add the fixed code tomorrow.
Still, noone capable of converting ASM -> PHP?
seo-junior 06-13-2004, 02:12 PM Now my question: Can anyone rewrite this code snippet into PHP?
Still, noone capable of converting ASM -> PHP?
Somebody out there has released a PHP script for calculating Google PageRank checksums needed for the parameter "ch" within all queries of PageRanks, even without the Google Toolbar.
They say it has been tested with over 1.5 million different domains.
You can test this script at Google PageRank checksum calculation (http://pagerank-checksum.homelinux.com). There is some additional information available about this PHP script.
I've made several requests for my own domains, the computed checksums are all working properly.
Maybe this script is what you need.
Many regards!
Unregistered 06-17-2004, 10:05 PM >off_100307ea is a jump table that I forgot to include. I will add the fixed code tomorrow.
Could you post the table? I'll try to -> PHP it. (=
Unregistered 06-17-2004, 10:16 PM Also could you give me the memory address for that code you posted?
doctorow 06-22-2004, 06:51 AM I added the missing jump switch table (off_100307EA).
Unregistered 06-23-2004, 08:10 PM Is anyone seriously working towards porting this to PHP? How far have you got if you are? I am still documenting the ASM code, almost finished though. Might have a working PHP implmentation finished this weekend. If anyone would like to dicuss working together on porting this to PHP further contact me at alex.stapleton@gmail.com please so we can swap information.
doctorow 06-24-2004, 03:30 AM Alex (I assume your name is Alex from your email),
please post and share your info here in this thread instead of e-mailing each other. Someone already messaged me and informed me that he has a working Perl code based on the code we posted here. I hope that he is also going to share it with us.
Unregistered 06-24-2004, 06:37 AM OK I will post what I know so far later today. I'm currently writing a PHP implementation of what appears to be a pretty simple hashing algorithm, I should be able to have a fully functioning PHP implementation working by Saturday or Sunday.
Unregistered 06-24-2004, 07:53 AM My work is availible here
http://meese.ath.cx/google/
I am working on those files directly so they will be updated as I make changes. The phps might be a bit behind the php but I will try and update it frequently.
These are pretty rough working notes for it, so don't expect much for a while, they may prove useful for someone else though.
doctorow 06-24-2004, 10:26 AM Thanks for the update! Cool idea to use $variables for the registers (eax, ...). ;)
Unregistered 06-24-2004, 10:55 AM I intend to reduce it down to something vaguely more understandable as I get to understand the algorithm better. But this way should at least produce the right output. The problem with doing it this way is that if i make a mistake it's going to a bitch to correct.
Unregistered 06-24-2004, 11:00 AM What decompiler did you use? Do you know what flavour assembly it was interpreting in?
--alex (i shoudl register probably)
Unregistered 06-24-2004, 11:01 AM ...AND what do you wan't it in PHP for anyway?
doctorow 06-24-2004, 11:16 AM Decompiler: IDA (Interactive Disassembler)
What do you mean by "flavour assembly"?
PHP can be used on a server, so we can check page's pageranks with a single click. Also, I hate seeing how people sell their "secret" script for hundreds of $$$, considering that Google could any time update their checksum algorithm with releasing a new toolbar.
doctorow 06-24-2004, 11:16 AM Btw, you really should register here! Looks better than this "Unregistered" thing... ;)
alexstapleton 06-24-2004, 05:42 PM consider me registered. a PHP extension could be made using just the ASM stuff you realise. although that might be complex as it would mean having to hunt down my VS.NET CDs
Unregistered 06-25-2004, 01:33 PM Erm. Slight problem. The resulting number appears to be 36 bits long according to the . Yet we only have 32 bit registers. However the first digit is always a 6 (first 4 bits are always 1110) so im guessing theres some extra stuff going on not included in that assembly code (unless i'm being stupid.) For now i'm just going to add 60129542144 (1110 with 32 0's after it) to it, which i'm hoping should work. however if you wouldn't mind investigating the bit which actually produces the URL to see if anything else is going on it might be useful.
alexstapleton 06-25-2004, 02:54 PM upon further investigation they do not all start with 1110. they are however all 36 bits long. this is weird isnt it?
doctorow 06-25-2004, 03:21 PM The assembler routine above is only the CORE of the algorith used. When the routines returns, you must first convert the content of eax from hex to decimal. After that, you must PREPEND "ch=6" (used by the latest toolbar version) to the number. So if eax return 12345678, converted to is 305419896, and the final checksum is ch=6305419896.
doctorow 06-25-2004, 03:21 PM Erm. Slight problem. The resulting number appears to be 36 bits long according to the . Yet we only have 32 bit registers. However the first digit is always a 6 (first 4 bits are always 1110) so im guessing theres some extra stuff going on not included in that assembly code (unless i'm being stupid.) .
You are definitely not stupid ;) Great guesswork. See my post above!
alexstapleton 06-25-2004, 03:27 PM I think i've figured it out. Cunningly, they shuv a 6 on the start of the string. Problem solved.
doctorow 06-25-2004, 03:45 PM Cool!
Will you post the finished script here? Best is to use the [ PHP ] [/ PHP ] tags (without spaces) and put the code inbetween. I am sure those people selling the algorithm will be somewhat upset ;)
alexstapleton 06-25-2004, 05:01 PM lol its not ready yet! i think some code might be missing. can i talk to you on AIM or anything like that?
doctorow 06-25-2004, 05:18 PM I PM you my AIM... will be online for the next 45 mins or so.
alexstapleton 06-25-2004, 09:31 PM OK i've kinda hit a wall and I don't know whats wrong. I have implemented a single codepath for the algorithm. I have implemented everything for ciphertexts which are a multiple of 12 in length. I think the answer may be additional post processing which isn't in the ASM code but I can't be sure. If people would please examine my source to see if i'm missing anything (other than the large empty switch) I would be grateful.
alexstapleton 06-26-2004, 10:46 AM I think part of the problem is PHPs total lack of support for unsigned data types. Whilst I could write a bunch of UINT arithmatic functions, I don't think that is an elegant solution. So if you give me a while, I may write a PHP extension using the ASM code.
doctorow 06-26-2004, 10:55 AM Yes and no. I have been working on the script for the past hour and also noticed that the problem is PHP automatic type cast conversion. While UINT is not supported, forcing all the operations to (int) should do the trick as well. Lemme finish the editing and see what the result will be. Another small fix:
if($url_length > 12) {
should be
if($url_length >= 12) {
doctorow 06-26-2004, 10:59 AM Another small fix:
when you load the string chars to the $variables, we must convert the ascii value first using ord(), e.g.
$eax = ord($ecx{7+$ebp});// ;load 8th
$edx = ord($ecx{6+$ebp});// ;load 7th
UnknownZ 06-26-2004, 11:04 AM I hope I can get the PHP script too. I have been looking at google.com for these past 4 weeks and all of them require money in order to get the script. I found this thread actually from google.com. Please share it if it is done. Many many thanks in advance. :)
doctorow 06-26-2004, 11:28 AM Ok while normal operations work ok even with signed integers, bit shifting operations (>>) can really mess up when the integer is negative. Hmm. Stinks ;) Will think of something.
alexstapleton 06-26-2004, 01:01 PM I already did ord() conversion on the entire url. Theres a for loop that does it around line 40. PHP also has a tendency to convert the large values to floats. Which it shouldnt as the algorithm relies to some extent on overflow it seems. And er. It is >= on my script. Are you looking at googlehash.php? That has the full source of googlehash-test.php up to date in it.
alexstapleton 06-26-2004, 01:01 PM http://meese.ath.cx/google/googlehash.php
doctorow 06-26-2004, 01:05 PM ok... btw I think you should use e.g.
$eax = (int)($eax+$edx);
instead of
$eax = (int)$eax+$edx;
otherwise the typecast only applies to the right operator of the operation and the result could still be a long.
doctorow 06-26-2004, 01:06 PM btw, i downloaded the update now and will look at it again.
doctorow 06-26-2004, 01:47 PM signed/unsigned in general is not a problem, except for the rightshift operator >> ... a right shift fills the left bits with whatever the original sign bit (leftmost bit) held... so if the number was negative, the bits will be all filled with 1's, but they should be filled with zeros instead (unsigned right shift).
doctorow 06-26-2004, 01:57 PM I found the needed snipped from a SHA1 implementation for PHP:
Use
$eax = (int)(zerofill($eax, 0x0c));
instead of
$eax = (int)(eax >> 0x0c);
and define the following function:
function zeroFill($a, $b)
{
$z = hexdec(80000000);
if ($z & $a)
{
$a >>= 1;
$a &= (~$z);
$a |= 0x40000000;
$a >>= ($b-1);
}
else
{
$a >>= $b;
}
return $a;
}
alexstapleton 06-26-2004, 02:03 PM Updated with what you said. I've been running the Komodo debugger on it to check for floats and things, and I havent seen anything turn into a float with just (int) yet, but ill add the extra brackets anyway.
doctorow 06-26-2004, 02:08 PM Ok, it's working ;)
I will clean up the code a bit and then contact you, Alex!
alexstapleton 06-26-2004, 02:12 PM And why arent you on AIM?:P
doctorow 06-26-2004, 02:13 PM heh same reason like yesterday... my "better half"....
doctorow 06-26-2004, 02:29 PM attached file for alex
cyberax 06-27-2004, 12:03 AM Hi Guys,
I am new to this forum and was excited to see that people are really working together to find the PHP version of the google checksum algo.
I would also be part of this joint effort and would also love to see the working PHP version.
By far and large I know about the checksum calculation is that it is using Hash Function and instead of using the lengthy process if we could translate the Hash Function by Bob Jenkins at : http://burtleburtle.net/bob/c/lookup2.c to PHP it would really work good for us.
The Process:
GOOGLE_MAGIC = 0xe6359a60
The functions that would be required are: mix(a,b,c) and ub4 hash( k, length, initval)
would be called as: hash(url, strlen(url), GOOGLE_MAGIC));
We need to encode the url first.
The C version of the urlencode function is as follows:
void urlencode(char *u, char *e) {
while (*u != '\0') {
if (*u == '.') {
*e++ = '%';
*e++ = '2';
*e = 'E';
} else if (*u == ':') {
*e++ = '%';
*e++ = '3';
*e = 'A';
} else if (*u == '/') {
*e++ = '%';
*e++ = '2';
*e = 'F';
} else {
*e = *u;
}
u++;
e++;
}
}
I am already working on a C version of the same. And I hope it would be complete soon.
I hope this will help.
Cheers,
CyberaX
cyberax 06-27-2004, 01:10 AM Someone who have earlier hacked the Google Toolbar suggested we must also add "info:" before hashing the URL
Cheers,
Cyberax
cyberax 06-27-2004, 02:09 AM If I am not wrong then please also try to check the CH value with the following string:
$url = 'info%3Awww%2Edomb%2Ecam';
please revert back on the same.
Regards,
By far and large I know about the checksum calculation is that it is using Hash Function and instead of using the lengthy process if we could translate the Hash Function by Bob Jenkins at : http://burtleburtle.net/bob/c/lookup2.c to PHP it would really work good for us.
I am already working on a C version of the same. And I hope it would be complete soon.
i also ran across that code when i search for the "0x9e3779b9" but couldn't get it to work before. then when i saw your post i took another hack at it and it works. no code needed changing, just needed to prepend 'info:'. encoding the url breaks it.
cyberax 06-27-2004, 05:10 AM Hey thats gr8 guys :cool:
You all are too good :crowngrin
cyberax 06-27-2004, 05:16 AM So now can anyone translate it into a full working PHP code and post it on this forum ?
Try to make the function something like GetGoogleCH("www.cyberax.net") which should return the 11 digit CH value.
Looking forward to see that code soon :cool
Cheers
Cyberax
doctorow 06-27-2004, 05:53 AM That is great news... the php script Alex and I were working on will work soon, but of course to have a clean c version of the hash function makes it easier and prettier at the end.
cyberax, how did you find out that the hash function is the one by this Bob Jenkins?
doctorow 06-27-2004, 05:56 AM i also ran across that code when i search for the "0x9e3779b9" but couldn't get it to work before. then when i saw your post i took another hack at it and it works. no code needed changing, just needed to prepend 'info:'. encoding the url breaks it.
Ahhh... perhaps I should have googled more ;)
alexstapleton 06-27-2004, 07:46 AM Is that code THE googlechecksum though? It is substanitally different from the google code if im reading it right. In particular the values used for shifting
alexstapleton 06-27-2004, 07:48 AM we still have the slight problem of it producing minus numbers when it shouldnt too. today i will work converting the ASM code to inline MASM assmebly for writing a PHP module
cyberax 06-27-2004, 07:57 AM Probably I had an intuition about the same :)
Cyberax
alexstapleton 06-27-2004, 07:58 AM do you want to post your C source for the hash function?
alexstapleton 06-27-2004, 08:14 AM thanks. i shal work on turning it into php right now.
cyberax 06-27-2004, 08:20 AM hi alex.. we are all behind you :)
doctorow 06-27-2004, 08:23 AM alex it shouldn't matter that we are working with "minus" numbers... it is simply another way of interpreting the left-most bit. The only problem we had in php was the rightshift operator, which filled empty spaces with 1's if the number was negative. That we fixed with the zerofill() function.
When we have the final result in $eax, we can use a simple function like printf() to convert it to an unsigned integer.
alexstapleton 06-27-2004, 08:23 AM Thanks loads for the C code. I should be finished in 10-20 minutes at most, although I may have to add fixes for unsigned integers again.
alexstapleton 06-27-2004, 08:36 AM Ok. Getting minus values still. Any suggestions? Code is availible here
http://meese.ath.cx/google/googlecheck3.php
alexstapleton 06-27-2004, 08:40 AM my aim username is 'alex w stapleton' without the speach marks of course.
alexstapleton 06-27-2004, 08:45 AM Im getting minus values. And their binary equivilents are wrong! Hence it doesnt work. Someone please examine my source.
doctorow 06-27-2004, 08:52 AM Try it with adding
sprintf("%u",$ch);
at the end!
Unregistered 06-27-2004, 08:53 AM Two comments:
1) The "initval" (or "level"-value) in the Jenkins hash-algorithm is probably not the GOOGLE_MAGIC-value since initval is supposed to by a 4-byte number.
2) The "info:" of the URL-string should probably not be url-encoded (that is, the :-sign shall not be replaced with %3A).
cyberax 06-27-2004, 08:59 AM Hi Alex / Doctorow ... how is it going ?
Lets do it guys.. then we will try to do something about Yahoo WebRank :)
alexstapleton 06-27-2004, 09:03 AM 0xE6359A60 is what the ASM code uses. (the magic value)
i fixed the info: thing
still gives wrong result with printf
alexstapleton 06-27-2004, 09:14 AM im gunna run a debugger over the C code to check where the values are different between versions
doctorow 06-27-2004, 09:16 AM alex: I cannot debug your code before tonight... will download the most current one and check it out later.
cyberax: didn't even know that yahoo had something similar ;)
alexstapleton 06-27-2004, 09:21 AM After the while loop:
PHP: $c = 671391756
C: c = 3884531380
spot the difference?
cyberax 06-27-2004, 09:24 AM In my opinion we need the Hash of "info%3Awww%2Eexample%2Ecom"
alexstapleton 06-27-2004, 09:25 AM WHOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO OO
i've done it! the urlencoded() string should NOT be sent to the hash function. silly me. time for code cleanup then release.
cyberax 06-27-2004, 09:25 AM url: info:http://www.example.com/
540747202
00100000001110110010010111000010
should be
540747202
00100000001110110010010111000010
Did you make it ?
doctorow 06-27-2004, 09:28 AM ;) ;) ;)
cyberax 06-27-2004, 09:28 AM WOW !!!
Cheers to Alex ;)
Alexander Turcic 06-27-2004, 09:38 AM doctorow, alex,
you guys don't mind when I bring your findings to the frontpage? It's some interesting work you two came up with!
cyberax 06-27-2004, 09:38 AM Hey.. Alex its Cyberax (Vijay Bhatter) :)
The site looks good ;)
alexstapleton 06-27-2004, 09:40 AM I'm just setting up a page to demonstrate it. Soon as my remote server wakes up i'll host it on that instead of my home machine. 5 mins or so till im ready.
doctorow 06-27-2004, 09:43 AM Alex, I really like your site. Sure go ahead and post about it!
alexstapleton 06-27-2004, 09:46 AM http://meese.ath.cx/google/
please dont link to this page its hosted on my ADSL line at home. I will make a mirror page on my datacenter server as soon as i can.
doctorow 06-27-2004, 09:49 AM alex, can you provide the source so that we can publish it now?
doctorow 06-27-2004, 09:49 AM Ah just saw it! Thanks!
alexstapleton 06-27-2004, 09:51 AM http://alex.vort-x.net/google/
you may link here.
cyberax 06-27-2004, 09:51 AM doctorow, alex,
you guys don't mind when I bring your findings to the frontpage? It's some interesting work you two came up with!
Seems like I was left out. Anyways...
Thanks to Alex and Doctorow for such a good work :)
doctorow 06-27-2004, 09:52 AM The Google Checksum Calculator, by Alex Stapleton, Andy Doctorow, Vijay "Cyberax" Bhatter, and a few others.
<?php
/*
This code is released unto the public domain
*/
header("Content-Type: text/plain; charset=utf-8");
define('GOOGLE_MAGIC', 0xE6359A60);
//unsigned shift right
function zeroFill($a, $b)
{
$z = hexdec(80000000);
if ($z & $a)
{
$a = ($a>>1);
$a &= (~$z);
$a |= 0x40000000;
$a = ($a>>($b-1));
}
else
{
$a = ($a>>$b);
}
return $a;
}
function mix($a,$b,$c) {
$a -= $b; $a -= $c; $a ^= (zeroFill($c,13));
$b -= $c; $b -= $a; $b ^= ($a<<8);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,13));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,12));
$b -= $c; $b -= $a; $b ^= ($a<<16);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,5));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,3));
$b -= $c; $b -= $a; $b ^= ($a<<10);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,15));
return array($a,$b,$c);
}
function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) {
if(is_null($length)) {
$length = sizeof($url);
}
$a = $b = 0x9E3779B9;
$c = $init;
$k = 0;
$len = $length;
while($len >= 12) {
$a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24));
$b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24));
$c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24));
$mix = mix($a,$b,$c);
$a = $mix[0]; $b = $mix[1]; $c = $mix[2];
$k += 12;
$len -= 12;
}
$c += $length;
switch($len) /* all the case statements fall through */
{
case 11: $c+=($url[$k+10]<<24);
case 10: $c+=($url[$k+9]<<16);
case 9 : $c+=($url[$k+8]<<8);
/* the first byte of c is reserved for the length */
case 8 : $b+=($url[$k+7]<<24);
case 7 : $b+=($url[$k+6]<<16);
case 6 : $b+=($url[$k+5]<<8);
case 5 : $b+=($url[$k+4]);
case 4 : $a+=($url[$k+3]<<24);
case 3 : $a+=($url[$k+2]<<16);
case 2 : $a+=($url[$k+1]<<8);
case 1 : $a+=($url[$k+0]);
/* case 0: nothing left to add */
}
$mix = mix($a,$b,$c);
/*-------------------------------------------- report the result */
return $mix[2];
}
//converts a string into an array of integers containing the numeric value of the char
function strord($string) {
for($i=0;$i<strlen($string);$i++) {
$result[$i] = ord($string{$i});
}
return $result;
}
// http://www.example.com/ - Checksum: 6540747202
$url = 'info:'.$_GET['url'];
print("url:\t{$_GET['url']}\n");
$ch = GoogleCH(strord($url));
printf("ch:\t6%u\n",$ch);
?>
alexstapleton 06-27-2004, 09:53 AM Mmmm messy. Should really of made it prettier. Oh well.
Alexander Turcic 06-27-2004, 09:54 AM cyberax, sorry about that ;) This thread just came to my attention and I didn't read every single post.
alexstapleton 06-27-2004, 10:13 AM http://forums.seochat.com/t12316/s.html
news is spreading fast it seems.
alexstapleton 06-27-2004, 10:17 AM ahhh its doctorow spamming SEO forums ;)
doctorow 06-27-2004, 10:22 AM alex dude, why would you suspect me?
;)
UnknownZ 06-27-2004, 11:01 AM Thank you for the code, it's working. Thanks man. You are all really great. :D
cyberax 06-27-2004, 11:03 AM Wow!!!
I did'nt knew that I had such a big secret in my wallet ;) Thanks to Gemini who told me abt the same.
Cheers
the c code works for me, but the php code is returning different results. for 'http://www.example.com/' i get 62147418112 on my powerbook (big endian) and 62920760850 on x86 linux (little endian).
alexstapleton 06-27-2004, 12:29 PM only works on little endian machines obviously. there is a big endian version of the hash function i believe. i will look into big endian compatibility.
doh! i forgot to prepend 'info:' so my results were also wrong for little endian. sorry, my bad.
alexstapleton 06-27-2004, 02:10 PM actually your result was right for little endian.
you can use http://alex.vort-x.net/google/ the form there to verify ch values if you want.
alexstapleton 06-27-2004, 02:11 PM erm what am i talking about. yes it was wrong. lol :S
http://www.example.com = 6540747202
Unregistered 06-27-2004, 06:15 PM I just developed a complete all-in-one function in PHP that using The Google Checksum Calculator, by Alex Stapleton, Andy Doctorow, Vijay "Cyberax" Bhatter, and a few others, (http://www.mobileread.com/forums/showpost.php?p=7769&postcount=87) and XMLize.php by Hans Anderson (http://www.hansanderson.com/php/xml/) automatically calculates the PageRank for a page. Try it at:
http://zenitram.th4y.com/pagerank
And the source code is at:
http://zenitram.th4y.com/pagerank/source.txt
Hope you like it ;)
Unregistered 06-27-2004, 07:51 PM actually your result was right for little endian.
you can use http://alex.vort-x.net/google/ the form there to verify ch values if you want.
Is that url supposed to be working? It doesnt for me anyway. Or do I have to put http:// infront of the url? Or maybe "link:"???
(tested it all, even urlencoded, still doesnt give correct ch)
UnknownZ 06-27-2004, 10:04 PM You need to put http://
for example if your website is www.unregistered.com , just put in http://www.unregistered.com ...it will give you correct checksum. :D
And thanks also to zenitram for the code. :D
cyberax 06-27-2004, 10:20 PM Good Work from Zenitram :cool. But you guys really do not know how much loss you have made for the guys who are charging money for this piece of code :)
- Cyberax
UnknownZ 06-27-2004, 10:29 PM They have earned a lot already. :D
Unregistered 06-28-2004, 03:23 AM Ok, I found the bugg why your page is giving wrong ch for me...
When I look at your url after it has been submitted, the value for "url" is like this:
http%3A%2F%2Fwww.example.com
BUT for me the toolbar priduces this:
http%3A%2F%2Fwww.example.com%2
For every request it puts %2 after the call... why? Is this an client thing? I got WinXP and running IE6.0, all patched and done.
=(
Unregistered 06-28-2004, 03:26 AM correction; that should be %2F, not %2
Unregistered 06-28-2004, 04:43 AM %2F is a slash, see http://www.asciitable.com/
Without a trailing slash it isn't a proper URI
Festiz 06-28-2004, 07:22 AM Alex & co's calculator:
url: http://www.example.com/
ch: 62863130722
Homelinux's calculator
url: http://www.example.com/
ch: 6540747202
Spot the huge difference?
I get different values for all domains I've tryed. Why? Wasn't your goal to create a free version of theirs?
alexstapleton 06-28-2004, 07:48 AM http://alex.vort-x.net/google/
It works.
url: http://www.example.com/
ch: 6540747202
See. Any problems your having are due to your server setup. Not the code. Post information on that and we might be able to help. PHP version, whether you modified the script or not, CPU, web server (if any.)
alexstapleton 06-28-2004, 07:51 AM I just developed a complete all-in-one function in PHP that using The Google Checksum Calculator, by Alex Stapleton, Andy Doctorow, Vijay "Cyberax" Bhatter, and a few others, (http://www.mobileread.com/forums/showpost.php?p=7769&postcount=87) and XMLize.php by Hans Anderson (http://www.hansanderson.com/php/xml/) automatically calculates the PageRank for a page. Try it at:
http://zenitram.th4y.com/pagerank
And the source code is at:
http://zenitram.th4y.com/pagerank/source.txt
Hope you like it ;)
iirc it breaks the google TOS and they can block you for using it?
One nice thing about this is that people won't be able to sell this code for $$$ anymore ;)
Festiz 06-28-2004, 09:58 AM http://alex.vort-x.net/google/
It works.
url: http://www.example.com/
ch: 6540747202
See. Any problems your having are due to your server setup. Not the code. Post information on that and we might be able to help. PHP version, whether you modified the script or not, CPU, web server (if any.)
Excuse me if I don't see your point, but I tested yours and the payable versions online so that's why I wondered. Because if you both get different ch's then Only one of the values should be able to use when retreiving the PR
ZeNiTRaM 06-28-2004, 11:59 AM i don't think they'll block me, there are 1000000s of requests for the Google Toolbar every day, and they can't determine what ones are from my script and what ones not.
However they can change the algorithm, but they need to update all the Google Toolbars out there before they can remove access via the actual algorithm
alexstapleton 06-28-2004, 01:34 PM they can block requests coming from zenithram.th4y.com. so it is perfectly possile, and easy, for them to block anyone that breaks their TOS.
they can change the algotithm, and all the toolbars automatically, transparently update. but what benefit would that give them?
alexstapleton 06-28-2004, 04:45 PM Festiz: If you get different results, then your doing something wrong. Because it works and produces the same values as the one at the homelinux address. There is a pagerank fetching form on my website now so you can test that the values work.
Unregistered 06-29-2004, 08:16 AM Updated by Alex and Bill Zeller.
<?php
header("Content-Type: text/plain; charset=utf-8");
/*
* Written by Alex Stapleton
*
* With thanks to Vijay "Cyberax" Bhatter
* and Andy Doctorow. Probably wouldn't
* of been possible without their input.
*
* PHP code reduction and minor error
* checking by Bill Zeller
*
* This code is released into the public domain.
*/
function shr($a, $b){ // unsigned shift right
return (2147483648 & $a)?((($a>>1) &
~2147483648)|0x40000000)>>($b-1):($a>>$b);
}
function mix(&$x) {
for($a=array(array(13,8,13),array(12,16,5),array(3 ,10,15)),$i=0;$i<3;$i++)
for($j=0;$j<3;$j++,$d=$x[0]<<($a[$i][1]))
$x[$j]=($x[$j]-$x[($j+1)%3]-$x[($j+2)%3])^($j==1?$d:shr($x[($j+2)%3],$a[$i][$j]));
}
function GoogleCH($url) {
$url = array_slice(unpack('c*','info:'.$url),0);
$x = array(0x9E3779B9, 0x9E3779B9, 0xE6359A60);
for($k=0,$len=$length=sizeof($url);$len>=12;$k+=12,$len-=12){
$u=array_slice($url,$k);
for($i=0;$i<4;$i++)
for($j=0;$j<3;$j++)
$x[$j] += $u[$j*4+$i]<<(8*$i);
mix($x);
}
$u = array_slice($url,$k);
for($l=$len,$x[2]+=$length,$i=0;$i<4 && $l<12;$i++)
for($j=0;$j<3;$j++)
$x[$j]+=$l>($j*4+$i)?$u[$j*4+$i]<<(8*$i):0;
mix($x);
return sprintf('6%u', $x[2]);
}
// http://www.example.com/ - Checksum: 6540747202
$url = $_GET['url'];
echo "url:\t$url\n";
$ch = GoogleCH($url);
echo "ch:\t$ch\n";
$url = 'info:'.urlencode($url);
$link =
"http://www.google.com/search?client=navclient-auto&ch=6$ch&ie=UTF-8&oe=UTF-8&features=Rank&q=$url";
print("\n\nPage Rank URL: $link");
?>
Unregistered 06-29-2004, 12:57 PM Convert PHP > Delphi is possible ?
uncopyrightable 06-29-2004, 02:45 PM I'm almost finished converting this algorithm to Delphi.
But there is a stupid mistake in my code that I can't find :(
Probably it is during unaccurate work with data types.
Before last mix(a,b,c) it returns correct b and c but wrong a.
If you wish I can publish my code here.
CU,
uncopyrightable
P.S: sorry for my English.
Unregistered 06-30-2004, 03:45 AM Windows tool based on above's code:
http://www.my-eldorado.net/GGChecksumRecovery.zip
Unregistered 06-30-2004, 04:40 AM Very good tools ! Thanks !!
uncopyrightable 06-30-2004, 07:25 AM Yep! I'm porting this code to Pascal.
My mistake was in incorrect understanding how switch statement without breaks works.
So, after code purifying Your desire may shall come :)
Unregistered 06-30-2004, 07:46 AM Pascal code here :
http://www.my-eldorado.net/Google-Checksum-Recovery-Source.php
uncopyrightable 06-30-2004, 08:13 AM Both pascal source and binary: http://uncopyrightable.front.ru/
synku 06-30-2004, 08:22 AM Hi Alex / Doctorow ... how is it going ?
Lets do it guys.. then we will try to do something about Yahoo WebRank :)
Seriously, I would be glad to work on an equivalent for Y! WebRank.
However it's only available in beta version of the Yahoo Toolbar, and public beta-testing is closed atm.
Anyone had more chance ?
privmsg me to send any files to analyze.
Unregistered 07-01-2004, 07:33 PM anyone translated it to perl / cgi?
yes i am a lazy guy but if one has done it allready i'll save some time :)
please post here!
Will.Spencer 07-06-2004, 09:49 PM The Google Checksum Calculator, by Alex Stapleton, Andy Doctorow, Vijay "Cyberax" Bhatter, and a few others.
In this (latest?) revision of the PHP program, the first character of the ch value appears to be clipped when the ch value is printed.
url: http://www.gsm-security.net/
ch: 61014109771
Page Rank URL: http://www.google.com/search?client=navclient-auto&ch=661014109771&ie=UTF-8&oe=UTF-8&features=Rank&q=info:http%3A%2F%2Fwww.gsm-security.net%2F
See how the printed value of ch starts "610..." and the ch value in the URL starts "6610..."?
Or is this just me?
Will.Spencer 07-06-2004, 09:59 PM These versions need version control. :D
i translated to perl, but it seems that the current implementation im using is only 32bit, and some of the integers ive used as a test are at least 33 bit, which means it doesnt work!
If anyones got a clever solution to do bitwise operations on these numbers, please let me know!
Jim
Unregistered 07-07-2004, 10:51 AM I am actually working on a perl-version, too, right now.
I ran into the problem you described with the first test ...
the overflowing with bigger numbers and bitwise operations seems not to work.
I actually don't have a clue about all these basical things, I'm doing cgi-scripts most of the time. Anyhow, enough excuses. I think the problem can be solved by not using the bitwise operators on the big ints but simply prepare the ints to behave like in php or bash: for example
sub bitleft
{
my $first = shift;
my $second = shift;
my $max = 4_294_967_296;
my $special = 1 if($first > $max || $second > $max);
$first = $first - $max if($first > $max);
$second = $second - $max if($second > $max);
if($special == 1)
{
return ($first << $second);
}
my $result = $first << $second;
return -($max - $result);
}
where
$a << $b would be bitleft($a, $b);
it more or less works, but does not in all subs deal with signs correctly (eg. the xor-one has a problem with too big negative numbers). drop me a line at jan@delinquent.SPAMISBAD.de and I'll be happy to send you the code I got so far. remove the SPAMISBAD, of course ;)
Will.Spencer 07-07-2004, 12:40 PM Both pascal source and binary:
http://uncopyrightable.front.ru/
When I execute the binary from your site on my WinXP system, I receive the following error after I press <Get PR>:
"Access violation at address 00453B77 in module 'chpr.exe'. Read of address 00000000."
mhkay, obviously I really got a problem over here. Got most of the functions ready (but hey, it's ugly, it's really ugly...) but it can't be the right way. anyhow, gonna get them ready and testdrive then.
was trying to say, that I don't get it. I wrote functions that shall bitshift & stuff in perl behaving like php but obviously didn't make it, since they fail from time to time. Since I really don't have the theoretical background for all that, I hope someone else will have more success. the code I've written so far is available at http://www.delinquent.de/ch.txt . It's clean right now, so all my tests are out and it's ready to run. Start it, it tries to generate the checksum for http://www.delinquent.de/, which should be 63086170437 I think. review definetly needed, someone who actually understands more than I do will hopefully solve the issue with a couple of lines.
questions, comments and flames for bad code to jan@delinquent.de.invalid.
just leave the .invalid out when using it.
mrseo 07-15-2004, 08:36 AM Google sux !
Will.Spencer 07-17-2004, 06:05 PM If you're interested in Google PageRank, you've gotta check this out :
http://cryptopowah.online.fr
:wink:
Bahahahahahaha... people trying to sell free stuff is so funny.
Unregistered 07-19-2004, 03:50 PM Could anybody translate the algorithm into vb6 ??. I ll be very gratefull
Unregistered 07-22-2004, 02:33 AM I am trying to get this working and I am lost. I copied the finished script and saved it as a php file and it returns:
url: ch: 6882688726
Am I supposed to change a variable in the script with a different URL? I am new to php so any tips would be appreciated.
Colin Dunstan 07-22-2004, 05:43 AM The 'ch' stands for checksum.
The PageRank files can be requested directly from the domain www.google.com. Basically, the URLs for those files look like follows (without line breaks):
http://www.google.com/search?
client=navclient-auto&
ch=0123456789&
features=Rank&
q=info:http://www.domain.com/
replace 0123456789 with the checksum you receive for your site, and replace http://www.domain.com/ with your sites url you calculated the checksum for.
Codrut 07-22-2004, 01:09 PM I've read all the work you done guys here, and it is excellent. So I took the php and adapt/put it to work.
Strange things happens, as for example google turns to me a page which begins with:
Forbidden
Your client does not have permission to get URL .... etc
Do you have any idea why ?
There are many pages that gives me that response, one example is www.addidas.3x.ro
May thanks, Codrut
Unregistered 07-22-2004, 02:27 PM Yah, if I go to that URL with my browser I get the same error. Is the script supposed to request that URL? How do I get it to return the actual Page Rank like the page here?
http://www.zenitram.th4y.com/pagerank
Ghank 07-22-2004, 08:24 PM Got it working! I modified the script to check the PR of my links database. But I found a problem, When the URL includes a plus sign (+), it returns the following error: XML error: no element found at line 1
I changed the + to it's hex equivalent %2b and it does the same. Any ideas on how to fix it? Most people only want to check a domain PR, but I'd like to check individual pages some which have special characters.
Thanks to the people that put this script together!
Codrut 07-23-2004, 10:24 AM Ghunk, can you verify www.ad4u.ro ? Is it working at you ?
I get some Forbidden message.
Raistlin Majere 07-23-2004, 11:10 AM <html>
<body style="font-family:verdana;font-size:13px">
<?php
/*
This code is released unto the public domain
Raistlin Majere euclide@email.it
*/
define('GOOGLE_MAGIC', 0xE6359A60);
//unsigned shift right
function zeroFill($a, $b)
{
$z = hexdec(80000000);
if ($z & $a)
{
$a = ($a>>1);
$a &= (~$z);
$a |= 0x40000000;
$a = ($a>>($b-1));
}
else
{
$a = ($a>>$b);
}
return $a;
}
function mix($a,$b,$c) {
$a -= $b; $a -= $c; $a ^= (zeroFill($c,13));
$b -= $c; $b -= $a; $b ^= ($a<<8);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,13));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,12));
$b -= $c; $b -= $a; $b ^= ($a<<16);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,5));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,3));
$b -= $c; $b -= $a; $b ^= ($a<<10);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,15));
return array($a,$b,$c);
}
function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) {
if(is_null($length)) {
$length = sizeof($url);
}
$a = $b = 0x9E3779B9;
$c = $init;
$k = 0;
$len = $length;
while($len >= 12) {
$a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24));
$b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24));
$c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24));
$mix = mix($a,$b,$c);
$a = $mix[0]; $b = $mix[1]; $c = $mix[2];
$k += 12;
$len -= 12;
}
$c += $length;
switch($len) /* all the case statements fall through */
{
case 11: $c+=($url[$k+10]<<24);
case 10: $c+=($url[$k+9]<<16);
case 9 : $c+=($url[$k+8]<<8);
/* the first byte of c is reserved for the length */
case 8 : $b+=($url[$k+7]<<24);
case 7 : $b+=($url[$k+6]<<16);
case 6 : $b+=($url[$k+5]<<8);
case 5 : $b+=($url[$k+4]);
case 4 : $a+=($url[$k+3]<<24);
case 3 : $a+=($url[$k+2]<<16);
case 2 : $a+=($url[$k+1]<<8);
case 1 : $a+=($url[$k+0]);
/* case 0: nothing left to add */
}
$mix = mix($a,$b,$c);
/*-------------------------------------------- report the result */
return $mix[2];
}
//converts a string into an array of integers containing the numeric value of the char
function strord($string) {
for($i=0;$i<strlen($string);$i++) {
$result[$i] = ord($string{$i});
}
return $result;
}
function get_pr($url) {
$result=array("",-1);
if (($url.""!="")&&($url.""!="http://")):
// check for protocol
if (substr(strtolower($url),0,7)!="http://"):
$url="http://".$url;
endif;
$url="info:".$url;
$checksum=GoogleCH(strord($url));
$google_url=sprintf("http://www.google.com/search?client=navclient-auto&ch=6%u&features=Rank&q=".$url,$checksum); // url to get from google
$contents="";
// let's get ranking
// this way could cause problems because the Browser Useragent is not set...
if ($handle=fopen($google_url,"rb")):
while(true):
$data=fread($handle,8192);
if (strlen($data)==0):
break;
endif;
$contents.=$data;
endwhile;
fclose($handle);
else:
$contents="Connection unavailable";
endif;
$result[0]=$contents;
// Rank_1:1:0 = 0
// Rank_1:1:5 = 5
// Rank_1:1:9 = 9
// Rank_1:2:10 = 10 etc
$p=explode(":",$contents);
if (isset($p[2])):
$result[1]=$p[2];
endif;
endif;
return $result;
}
// GET IT !
$pr=get_pr($_GET["url"]);
$output=$pr[0];
$pagerank=$pr[1];
?>
<br>
<form method="get" action="?r=<?=time()?>">
<center>
<table style="font-family:verdana;font-size:13px">
<tr>
<td valign="top"><b>Url</b></td>
<td valign="top"><?=$_GET['url']?></td>
</tr>
<tr>
<td valign="top"><b>Page<br>Rank</b></td>
<td valign="top" style="color:red"><b><?=$pagerank?></b></td>
</tr>
<tr>
<td valign="top"><b>Google<br>output</b></td>
<td valign="top"><div style="overflow:auto;width:250px;height:150px;background-color:#eeeeee;border:1px solid #aaaaaa"><?=$output?></div></td>
</tr>
</table>
<br>
<input type="text" name="url" size="80" value="<?=isset($_GET['url'])?$_GET['url']:"http://"?>"><br>
<input type="submit">
</center>
</form>
</body>
</html>
ghank 07-23-2004, 01:32 PM Codrut, that URL works fine.
However this one doesn't because it has a + or a %2b in it:
http://www.trimbody.com.au/resources.health%2binsurance.html
http://www.trimbody.com.au/resources.health+insurance.html
Cooper 07-25-2004, 08:07 PM First of all, Kudos to all the wonderful ppl who accomplished this. I'm quite sure you've given Larry something to think about. :laugh3: It's only a matter of time until we know whether it's gonna last long, which I hope it would.
Even though I'm quite disappointed in missing the fun, there's atleast some hope with http://help.yahoo.com/help/us/companion/webrank. It seems there's already one out, http://www.digitalpoint.com/tools/webrank . Hope to work with u guyz soon. :deal:
doctorow 07-26-2004, 06:06 AM The problem is that Yahoo uses a special Yahoo Toolbar to enable the WebRank feature, and only beta testers have/had access to this toolbar. And the beta test group is currently filled. So if anyone has a working link to the beta toolbar... would be very useful. The following link used to download the beta toolbar, but it doesn't seem to work anymore: http://companion.yahoo.com/config/slv4_done?.act=3&.dflt=1&.intl=us&.region=us&.partner=none&.guest=none&.cpdl=b543
Cooper 07-26-2004, 10:02 AM yup.. seems like it. uneasy about the "next version" mentioned. wonder whether it's another beta (http://beta.toolbar.yahoo.com). anyway the toolbar itself is avaiable at http://toolbar.yahoo.com. havent tried it yet though.
one more thing. Does anyone know the exact headers used by the toolbar to retrieve the PR. (I don't use it any longer due to it's privacy issues ;)). However, if we do know it, then we could simulate the toobar itself to be on the safe side when sending our requests. I found this one a bit interesting http://www.httprevealer.com/usage_google.htm. Want to know what u guys think.
doctorow 07-26-2004, 12:45 PM Yes, but the toolbar available for download doesn't include the PageRank feature (or I am just too blind to see it) ;(
Btw, someone wrote a Javascript version of the algorithm and implemented it as a Firefox extension: http://www.tapouillo.com/firefox_extension/
Cooper 07-26-2004, 06:14 PM Ya, I'm aware of that. Also the PR I reffered to was for the Google toolbar. Yahoo has WebRank, right ? ;) anyway, I think thier toolbar is more off to combat spyware rather than promote rankings.
how often does google update thier toolbar version, I'm using GoogleToolbar 2.0.111-as the agent now. btw, the firefox extension is neat.. that eliminates the toolbar far more easily.
Cooper 07-27-2004, 06:46 AM However this one doesn't because it has a + or a %2b in it:
http://www.trimbody.com.au/resources.health%2binsurance.html
http://www.trimbody.com.au/resources.health+insurance.html
What are the actual PR's for these URLs ? (again, I don't use GTb :D )
Btw, I just wrote a mod, which simulates the Toolbar as it is using sockets. However, I'm still waiting for confirmation of the actual headers to make sure it's off the limits for suspicion. Will post it once it's done.
doctorow 07-27-2004, 12:47 PM Cooper, any idea how I can get the Yahoo beta toolbar? Would be interested to have a look at the WebRank feature...
acidophil 07-27-2004, 09:13 PM hi y-all!
yes, y!-WR whould be nice coz i think y! will be kick googles b...t soo.
but atm googles PR is relevant
and so i was wondering if someone has figured it out to get the checksum with a perl script??? :crowngrin
btw has anyone re-checked the PRs, coz google tend to change them - delivering wrong PRs!
THX
acidophil
Cooper 07-27-2004, 10:32 PM Quoted From Google PageRank, Meet Yahoo WebRank (http://searchenginewatch.com/sereport/article.php/3334891)
Where do the scores come from? Yahoo's help pages say the value is calculated by anonymously monitoring what people visit. So if many toolbar users visit particular sites, that might be a factor in helping the sites get a better Web Rank score.
But hold on! When I asked Yahoo earlier this month, I was told that the score is instead determined by Yahoo's search algorithm. That means visits to a site are not counted to create Web Rank. Instead, it's apparently link popularity based, as with Google's PageRank. Unfortunately, Yahoo hasn't yet corrected its help pages to reflect this.
A somewhat outdated article, but still pretty interesting. Jerry Rank ? LOL :D
However, so far, IMO Yahoo WR is practically useless . As I've heard in many places it does not show any rank for most of the toolbar (beta) users. So it's most likely that thier beta testing was primarily to keep the spies out. (I still wonder, how digitalpoint got thier hands on the webrank tool). The link you posted earlier was suppose to be the final know release of the beta version. However, it now downloads the normal toolbar. I tried it, but didn't quite convice me to keep it for more than 5 mins. :laugh3: Anyway, I had a few friends who were talking abt it back in april, so i guess they do have the beta versions. I'll check with 'em.
In the mean time, Google agents aren't uniform. I've been trying different stuff to accomplish high anonymity. However, unless run in client-side (like the firefox extension), it's hard to avoid sending unnoticeable requests from a single server (talking abt large volumes, like the PR tool at seo-guy). One option would be using socks. but thier availablity is totally undesireable. So I'm currently working with javascript to make the requests from the client side and obfuscating the html so that no one is aware of what's happening. :dizzy2: (specially G)
A JSP/Servlet-Applet combination which has both server & client side control would be nice. But only a few hosts run TomCat. Btw, I'll try to write a perl version if I do get some time for it.
Also, for those who need the PR displayed on thier site, here's an easier way, http://www.pagerank.net/display-pagerank.php
P.S: I just heard that your script is circulating on P2P, some at a cost. :ears:
this is completely done with javascript requiring no backend php or cgi. it also seems to be platform independent. props go to here (http://www.digitalpoint.com/tools/pagerank-mac/) for the original javascript. it doesn't handle urls with parameters (like the url of this page in the thread), but i haven't seen any of the other implementations handle these correctly, either (they chop off the parameters). just prepend 'javascript' + ':'.
function%20zeroFill(a,b){var%20z=2147483648;if(z&a){a=(a>>1);a&=(~z);a|=0x40000000;a=(a>>(b-1));}else{a=(a>>b);}return%20a;}function%20mix(a,b ,c){a-=b;a-=c;a^=(zeroFill(c,13));b-=c;b-=a;b^=(a<<8);c-=a;c-=b;c^=(zeroFill(b,13));a-=b;a-=c;a^=(zeroFill(c,12));b-=c;b-=a;b^=(a<<16);c-=a;c-=b;c^=(zeroFill(b,5));a-=b;a-=c;a^=(zeroFill(c,3));b-=c;b-=a;b^=(a<<10);c-=a;c-=b;c^=(zeroFill(b,15));return%20Array(a,b,c);}func tion%20GoogleCH(url){var%20length=url.length;a=b=0 x9E3779B9;c=0xE6359A60;k=0;len=length;while(len>=1 2){a+=(url[k+0]+(url[k+1]<<8)+(url[k+2]<<16)+(url[k+3]<<24));b+=(url[k+4]+(url[k+5]<<8)+(url[k+6]<<16)+(url[k+7]<<24));c+=(url[k+8]+(url[k+9]<<8)+(url[k+10]<<16)+(url[k+11]<<24));mixvar=mix(a,b,c);a=mixvar[0];b=mixvar[1];c=mixvar[2];k+=12;len-=12;}c+=length;switch(len){case%2011:c+=(url[k+10]<<24);case%2010:c+=(url[k+9]<<16);case%209:c+=(url[k+8]<<8);case%208:b+=(url[k+7]<<24);case%207:b+=(url[k+6]<<16);case%206:b+=(url[k+5]<<8);case%205:b+=(url[k+4]);case%204:a+=(url[k+3]<<24);case%203:a+=(url[k+2]<<16);case%202:a+=(url[k+1]<<8);case%201:a+=(url[k+0]);}mixvar=mix(a,b,c);return%20mixvar[2];}function%20strord(string){var%20result=new%20Arr ay();for(i=0;i<string.length;i++){result[i]=string.charCodeAt(i);}return%20result;}var%20ch=G oogleCH(strord('info:'+location.href));location.hr ef='http://google.com/search?client=navclient-auto&ch=6'+ch+'&features=Rank&q=info:'+location.href
this is completely done with javascript...
somehow a few spaces were automatically inserted in the code- they need to be stripped.
Unregistered 08-02-2004, 09:39 AM I was looking for this thing from some time thanks a lot all of u people.
http://www.freeware-alternative.uni.cc/link/b1.jpg
Deepesh Agarwal,
http://www.freeware-alternative.uni.cc/ (http://www.freeware-alternative.uni.cc/)
Visit website for Award-Wining Freeware's like: -
Easy Unistaller, The Ad-Police (Updated) - Now Blocks 14000+ Ad-Servers ,Service Controller XP(Updated), Dr.System 2.1 (New Build), Easy CDTray , Quick Deployment Wizard 1.0 - A Easy to Use Free Setup creator with High Compression, and Many More.....
acidophil 08-06-2004, 05:44 PM somehow a few spaces were automatically inserted in the code- they need to be stripped.
*gmpf*
i tried to skip the %20 and rebuild some LFs but i couldn't get it working...
ok i don't imported it as a sniplet to mozilla mac (on my old littl' black (pismo)) but i thought i could use it as an inline js with document.write
is the source somewere D/L-able? if possible as .js too ;)
THX
acidophil
pS.: searching for a perl script computing ch and an actionscript too!
Unregistered 08-11-2004, 05:12 PM this code really goes, but there is one mistake.
the right headers looks like this toolbarqueries.google.com not www.google.com its difference!
reply to xxxvendor@email.cz
Unregistered 08-11-2004, 08:20 PM Somebody on http://blog.outer-court.com/forum/ asked for a VBScript version. I posted my take at it here:
http://tumanov.com/projects/scriptlets/googlepagerankchecksum.asp
barranquilla 08-22-2004, 04:06 PM I tried to port the checksum algorithm to Java and I had some partial success. However, I think there's a problem with the way Java manages hex numbers (unsigned longs, maybe?). Here's the source:
import java.io.*;
public class GoogleRank {
public int GOOGLE_MAGIC = 0xE6359A60;
// Google Rank
private long zeroFill(long a, long b)
{
long z = 0x80000000;
if ((z & a) == z)
{
a = (a>>1);
a = a & (~z);
a = a | 0x40000000;
a = (a >> (b-1));
}
else
{
a = (a >> b);
}
return a;
}
public long[] mix(long a, long b, long c) {
a -= b; a -= c; a ^= (zeroFill(c,13));
b -= c; b -= a; b ^= (a<<8);
c -= a; c -= b; c ^= (zeroFill(b,13));
a -= b; a -= c; a ^= (zeroFill(c,12));
b -= c; b -= a; b ^= (a<<16);
c -= a; c -= b; c ^= (zeroFill(b,5));
a -= b; a -= c; a ^= (zeroFill(c,3));
b -= c; b -= a; b ^= (a<<10);
c -= a; c -= b; c ^= (zeroFill(b,15));
long[] mix = new long[3];
mix[0] = a;
mix[1] = b;
mix[2] = c;
return mix;
}
public long GoogleCH(String url) {
long a = 0x9E3779B9;
long b = 0x9E3779B9;
long c = GOOGLE_MAGIC;
int length = url.length();
int len = length;
int k = 0;
while(len >= 12) {
a += (url.charAt(k+0) +(url.charAt(k+1)<<8) +(url.charAt(k+2)<<16) +(url.charAt(k+3)<<24));
b += (url.charAt(k+4) +(url.charAt(k+5)<<8) +(url.charAt(k+6)<<16) +(url.charAt(k+7)<<24));
c += (url.charAt(k+8) +(url.charAt(k+9)<<8) +(url.charAt(k+10)<<16)+(url.charAt(k+11)<<24));
long [] mix = mix(a,b,c);
a = mix[0]; b = mix[1]; c = mix[2];
k += 12;
len -= 12;
}
c += length;
switch(len) /* all the case statements fall through */
{
case 11: c+=(url.charAt(k+10)<<24);
case 10: c+=(url.charAt(k+9)<<16);
case 9 : c+=(url.charAt(k+8)<<8);
/* the first byte of c is reserved for the length */
case 8 : b+=(url.charAt(k+7)<<24);
case 7 : b+=(url.charAt(k+6)<<16);
case 6 : b+=(url.charAt(k+5)<<8);
case 5 : b+=(url.charAt(k+4));
case 4 : a+=(url.charAt(k+3)<<24);
case 3 : a+=(url.charAt(k+2)<<16);
case 2 : a+=(url.charAt(k+1)<<8);
case 1 : a+=(url.charAt(k+0));
/* case 0: nothing left to add */
}
long[] mix = mix(a,b,c);
/*-------------------------------------------- report the result */
return mix[2];
}
public static void main(String[] args) {
try {
GoogleRank g = new GoogleRank();
long checksum = g.GoogleCH("info:http://www.example.com/");
// Anything go wrong?
} catch (Exception f) {
System.out.println("Exception: " + f.toString( ));
}
}
}
Any pointers on what may be wrong are very appreciated...
Unregistered 08-28-2004, 04:27 AM Windows tool based on above's code:
http://www.my-eldorado.net/GGChecksumRecovery.zip
This has been moved to...
http://www.my-eldorado.net/GGChecksumRecovery_win32.zip
The also posted the great PHP script you guys put together, though sadly declined to include you, the authors names...
/*
This code is released unto the public domain
*/
While I'm posting, I just wanted to say:
1) Great to see 3 of you working together to produce all this code.
2) That I respect and admire you all, for this great work.
3) Reading your collaborative efforts has made me decide to learn programming. I realise I want to "play".
You guys all rock and it has been great reading this long thread.
Well Done.
- David (Australia)
ZeNiTRaM 09-02-2004, 08:02 AM My PageRank Calculator gets back.. but with a new name and new features.
Now it's GoogleGetInfo and gets PageRank and 10 parameters more.
Check it at http://dhost.info/zeni/google-info
It gets all the parameters from Google in XML and returns them in an array with those parameters:
//0: URL
//1: Title
//2: PageRank. The Most Wanted!
//3: Last Crawled Date. Syntax: " 31 Aug 2004". Note the first space, use trim() to remove it.
//4: Page Text that appears in Google ;)
//5: Language of the page (2 letters)
//6: URL in the Directory. Get to http://www.google.com/<this> to see it.
//7: Title in the Directory.
//8: Description in the Directory.
//9: File Size. Ex: "59k"
//10: Looks like a checksum of the page, not sure
Read googlegetinfo.php comments for info.
ZeNiTRaM 09-02-2004, 08:04 AM Note: The PageRank calculator URL (http://zenitram.th4y.com/pagerank) won't work anymore, the server disappeared...
mosert 09-09-2004, 11:18 AM this php's program won't certainly work for a long time : google introduced yesterday a new version of the toolbar (v 2.0.114) and changed the checksum algorithm
Maybe a little change, maybe not...
Can someone try to decompilate and see ?
Mosert
for information, the 4 last version of the toolbar were :
* version 2.0.111*: 2004-05-14
* version 2.0.112*: 2004-07-15
* version 2.0.113*: 2004-07-30
* version 2.0.114*: 2004-09-09
doctorow 09-09-2004, 11:50 AM Mosert, can you show me how the checksum has changed in 2.0.114 since 2.0.113- like show me the CH of the same URL for both versions?
mosert 09-09-2004, 02:33 PM look at this:
http://www.prweaver.com/blog/2004/09/09/10-algorithme-cheksum-toolbar
doctorow 09-09-2004, 04:52 PM Thanks for the info, mosert.
A first glimse into v2.0.114.1 reveals that the Bob Jenkins hash is applied twice now:
1. bobJenkinsHash(info:url) // results in the old checksum seed like used in older toolbar versions
2. generate a 80 byte table from the hash in 1.
3. bobJenkinsHash(80 byte table) // which is the new checksum seed
So all that is necessary is to add the code for converting the first hash into the 80 byte table. I am going to add the assembler snippets in a bit here.
Doc
mosert 09-10-2004, 10:09 AM how did you find these conclusions without the assembler snippet ? ;)
now game has opened : we are toying with google !
doctorow 09-10-2004, 10:12 AM I do have the snippet ;) Just have to clean it up before posting them here.
Colin Dunstan 09-10-2004, 06:25 PM So here is the promised update, in assembler code:
Before Toolbar v2.0.114, this was basically the code to calculate the CH:
CHStart:
mov esi, 0E6359A60h ; GOOGLE_MAGIC
push esi
push dword ptr [ebp-3Ch] ; info:URL size
push eax ; info:URL
call _bobJenkinsHash
After that, one only had to convert eax to unsigned integer and prepand CH=6.
In Toolbar v2.0.114, Google doesn't stop here. The code goes on:
newCode:
push eax ; resulting 32bit hash (used as final hash in toolbar < 2.0.114
call _32bitMutation
add esp, 10h
mov [ebp+0Ch], eax
xor eax, eax
lea ecx, [ebp-0B0h]
generate80ByteTable:
mov edx, [ebp+0Ch]
sub edx, eax
mov [ecx], edx
add eax, 9
add ecx, 4
cmp eax, 180
jb short generate80ByteTable
push esi ; GOOGLE_MAGIC (0xE6359A60)
lea eax, [ebp-0B0h]
push 80 ; Table size
push eax ; Table offset
call _bobJenkinsHash
with function generate80ByteTable:
_32bitMutation proc near
hash = dword ptr 4
mov eax, [esp+4]
push esi
push 0Dh
pop ecx
xor edx, edx
div ecx
mov eax, [esp+4+hash]
push 7
pop esi
mov ecx, edx
xor edx, edx
div esi
and ecx, 7
pop esi
shl eax, 2
or eax, ecx
retn
_32bitMutation endp
So as I said before, the Bob Jenkins hash is applied twice now:
1. bobJenkinsHash(info:url)
2. the resulting hash (32bit) undergoes a small modification
3. generate a 80 byte table from the modified hash in 1.
4. bobJenkinsHash(80 byte table)
After that, like in previous toolbar versions, you convert eax to unsigned integer and prepand CH=6.
The remaining step is to transcode the assembler snippets into high-level again.
Unregistered 09-14-2004, 03:41 AM I've got a job now so dont have rediculous amounts of free time to update my code. However I will give it a shot whenever I get a chance if nobody else beats me too it.
Unregistered 09-14-2004, 03:31 PM Ok, got some spare time tonight, going to comment the new code so we can get an idea of whats going on with the new code
Unregistered 09-14-2004, 04:27 PM Can you please post a FULL ASM dump of the hash code please, just in case any of the registers being used have changed or something.
alexstapleton 09-14-2004, 04:57 PM _32bitMutation proc near
hash = dword ptr 4
//why is this named hash?
//it seems to be more or less the same as var_4 in the old version
//but positive of course
mov eax, [esp+4] //eax becomes equal to the 4th URL char
$eax = $url[3];
push esi //esi to stack
//esi is the result of the 4th XOR round of BJHash
//i havent seen the latest dump yet though so this could be wrong
push 0Dh // ecx = 13 }
pop ecx // ecx = 13 }
xor edx, edx // edx = 0
$edx = 0;
div ecx // ecx = ecx/13
$eax = (int)($eax/13);
mov eax, [esp+4+hash] //eax becomes equal to the 8th URL char
$eax = $url[7];
push 7 // esi = 7 }
pop esi // esi = 7 }
$esi = 7;
mov ecx, edx // ecx = remainder of the DIV
$ecx = $eax%13;
xor edx, edx // edx = 0
$edx = 0;
div esi // eax DIV 7
$eax = $eax/7;
and ecx, 7 // ecx AND 7
$ecx = $ecx & 7;
pop esi // return the value to esi we saved at the start
$esi = $BJHash[3][0];
shl eax, 2 //multiply eax by 8
$eax = ($eax << 2);
or eax, ecx //eax OR ecx
$eax = $eax | $ecx;
retn
alexstapleton 09-14-2004, 05:02 PM please check my above ASM with bits o PHP stuck in for mistakes please
No idea if this gives the right output or not but easier to read.
$p is called so because the 4th char should almost always p
$w is called so because the 8th char should not quite almost always be w
e.g. http://www
capiche?
ive converted this to high level somewhat
function Mutate($url, $BJHash) {
$p = $url[3];
$rem = ($p%13);
$w = $url[7];
$w = $w/7;
$rem = $rem & 7;
$rem_shl = ($rem << 2);
$mut = $rem_shl | $rem;
return $mut;
}
Hi all,
I've just done all the work in C, including a small checksum calculator.
Enjoy and don't forget to greet me in your upcoming tools ;)
http://pagerank.mindsay.com/
Tarakan
alexstapleton 09-14-2004, 06:57 PM almost figured out the 80byte table bit, will post findings tomorrow. need sleep!
someone check my 32 bit mutate stuff and report back!!!
alexstapleton 09-14-2004, 07:01 PM ah right gprm beat me too it, nice one. if nobody has got a php version up by tomorrow evening ill do one. (damned jobs getting in the way of geekdom)
doctorow 09-15-2004, 05:50 AM Nice job guys!
gprm, I hope you don't mind me posting your c source here? ;)
/*
gET my pAGE rANK mOFO !
A piece of code made for fun. This is an extract from GPRM.
Don't take this checksum thing too seriously.
Google can't reasonably expect to protect his technology with a table look-up hash algorithm ?!
gprm@altern.org
This code is published under the GNU Public License.
If you use it, you have to explicitely mention GPRM and me.
Tarakan
*/
// checksum calculation, plain C
char data[160];
char url[500];
unsigned int prbuf[20],l;
char site[150];
strcpy(data, "info:");
strcat(data, site);
l=hash((unsigned char *)data, strlen(data), 0xe6359a60);
l=(((l/7) << 2) | ((l%13)&7));
prbuf[0]=l;
for(i=1; i<20; i++) prbuf[i]=prbuf[i-1]-9;
l=hash((BYTE *)prbuf, 80, 0xe6359a60);
doctorow 09-15-2004, 05:51 AM Alex, I know what you mean btw. Often also the opposite is true: geekdom is getting in the way of my job ;)
alexstapleton 09-15-2004, 07:34 AM $ch = mix($a,$b,$c);
$ch=((($ch/7) << 2) | (($ch%13)&7));
$prbuf = array();
$prbuf[0]=$ch;
for($i=1; $i<20; $i++) {
$prbuf[$i]=$prbuf[$i-1]-9;
}
$ch=mix($prbuf, 80, 0xe6359a60);
printf('CH: 6%u', $ch);
Jelko 09-15-2004, 09:58 AM Hm I tried to implement Tarakan's and Alex's code, but somehow the resulting checksum is not right:
<?php
/*
Written and contributed by
Alex Stapleton,
Andy Doctorow,
Tarakan,
Bill Zeller,
Vijay "Cyberax" Bhatter
This code is released unto the public domain
*/
header("Content-Type: text/plain; charset=utf-8");
define('GOOGLE_MAGIC', 0xE6359A60);
//unsigned shift right
function zeroFill($a, $b)
{
$z = hexdec(80000000);
if ($z & $a)
{
$a = ($a>>1);
$a &= (~$z);
$a |= 0x40000000;
$a = ($a>>($b-1));
}
else
{
$a = ($a>>$b);
}
return $a;
}
function mix($a,$b,$c) {
$a -= $b; $a -= $c; $a ^= (zeroFill($c,13));
$b -= $c; $b -= $a; $b ^= ($a<<8);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,13));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,12));
$b -= $c; $b -= $a; $b ^= ($a<<16);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,5));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,3));
$b -= $c; $b -= $a; $b ^= ($a<<10);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,15));
return array($a,$b,$c);
}
function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) {
if(is_null($length)) {
$length = sizeof($url);
}
$a = $b = 0x9E3779B9;
$c = $init;
$k = 0;
$len = $length;
while($len >= 12) {
$a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24));
$b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24));
$c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24));
$mix = mix($a,$b,$c);
$a = $mix[0]; $b = $mix[1]; $c = $mix[2];
$k += 12;
$len -= 12;
}
$c += $length;
switch($len) /* all the case statements fall through */
{
case 11: $c+=($url[$k+10]<<24);
case 10: $c+=($url[$k+9]<<16);
case 9 : $c+=($url[$k+8]<<8);
/* the first byte of c is reserved for the length */
case 8 : $b+=($url[$k+7]<<24);
case 7 : $b+=($url[$k+6]<<16);
case 6 : $b+=($url[$k+5]<<8);
case 5 : $b+=($url[$k+4]);
case 4 : $a+=($url[$k+3]<<24);
case 3 : $a+=($url[$k+2]<<16);
case 2 : $a+=($url[$k+1]<<8);
case 1 : $a+=($url[$k+0]);
/* case 0: nothing left to add */
}
$mix = mix($a,$b,$c);
/*-------------------------------------------- report the result */
return $mix[2];
}
//converts a string into an array of integers containing the numeric value of the char
function strord($string) {
for($i=0;$i<strlen($string);$i++) {
$result[$i] = ord($string{$i});
}
return $result;
}
// http://www.example.com/ - Checksum: 6540747202
$url = 'info:'.$_GET['url'];
print("url:\t{$_GET['url']}\n");
$ch = GoogleCH(strord($url));
printf("Checksum <2.0.114:\t6%u\n",$ch);
// new since Toolbar 2.0.114
$ch = ((($ch/7) << 2) | (($ch%13)&7));
$prbuf = array();
$prbuf[0] = $ch;
for($i = 1; $i < 20; $i++) {
$prbuf[$i] = $prbuf[$i-1]-9;
}
$ch = GoogleCH(strord($prbuf), 80);
//
printf("Checksum >=2.0.114:\t6%u\n",$ch);
?>
Any idea what is still wrong?
The critical point is this $prbuf array. It should be an array of unsigned int (32bit).
so if you have twenty unsigned int, it's hashed as 20*4=80 bytes (8 bit)
I don't think your strord($prbuf) is doing well here.
Only suggestions as I'm quite ignorant regarding type conversions in php.
Yes, the point here is that you must translate $prbuf into an array of 8 bit integers.
I have modified your code, adding a function for doing that. It still doesn't work with all URLs, because the ***** php is not able to manage 32 bit unsigned integers :angry: :angry: , so the line
$ch = ((($ch/7) << 2) | (($ch%13)&7));
doesn't work if $ch >2147483647 (if $ch has the bit 32 set to 1)
I am looking after a way of solving that.. if anyone has any ideas.. :D
<?php
/*
Written and contributed by
Alex Stapleton,
Andy Doctorow,
Tarakan,
Bill Zeller,
Vijay "Cyberax" Bhatter
traB
This code is released unto the public domain
*/
header("Content-Type: text/plain; charset=utf-8");
define('GOOGLE_MAGIC', 0xE6359A60);
//unsigned shift right
function zeroFill($a, $b)
{
$z = hexdec(80000000);
if ($z & $a)
{
$a = ($a>>1);
$a &= (~$z);
$a |= 0x40000000;
$a = ($a>>($b-1));
}
else
{
$a = ($a>>$b);
}
return $a;
}
function mix($a,$b,$c) {
$a -= $b; $a -= $c; $a ^= (zeroFill($c,13));
$b -= $c; $b -= $a; $b ^= ($a<<8);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,13));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,12));
$b -= $c; $b -= $a; $b ^= ($a<<16);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,5));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,3));
$b -= $c; $b -= $a; $b ^= ($a<<10);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,15));
return array($a,$b,$c);
}
function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) {
if(is_null($length)) {
$length = sizeof($url);
}
$a = $b = 0x9E3779B9;
$c = $init;
$k = 0;
$len = $length;
while($len >= 12) {
$a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24));
$b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24));
$c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24));
$mix = mix($a,$b,$c);
$a = $mix[0]; $b = $mix[1]; $c = $mix[2];
$k += 12;
$len -= 12;
}
$c += $length;
switch($len) /* all the case statements fall through */
{
case 11: $c+=($url[$k+10]<<24);
case 10: $c+=($url[$k+9]<<16);
case 9 : $c+=($url[$k+8]<<8);
/* the first byte of c is reserved for the length */
case 8 : $b+=($url[$k+7]<<24);
case 7 : $b+=($url[$k+6]<<16);
case 6 : $b+=($url[$k+5]<<8);
case 5 : $b+=($url[$k+4]);
case 4 : $a+=($url[$k+3]<<24);
case 3 : $a+=($url[$k+2]<<16);
case 2 : $a+=($url[$k+1]<<8);
case 1 : $a+=($url[$k+0]);
/* case 0: nothing left to add */
}
$mix = mix($a,$b,$c);
/*-------------------------------------------- report the result */
return $mix[2];
}
//converts a string into an array of integers containing the numeric value of the char
function strord($string) {
for($i=0;$i<strlen($string);$i++) {
$result[$i] = ord($string{$i});
}
return $result;
}
// converts an array of 32 bit integers into an array with 8 bit values. Equivalent to (BYTE *)arr32
function c32to8bit($arr32) {
for($i=0;$i<count($arr32);$i++) {
for ($bitOrder=$i*4;$bitOrder<=$i*4+3;$bitOrder++) {
$arr8[$bitOrder]=$arr32[$i]&255;
$arr32[$i]=zeroFill($arr32[$i], 8);
}
}
return $arr8;
}
// http://www.example.com/ - Checksum: 6540747202
$url = 'info:'.$_GET['url'];
print("url:\t{$_GET['url']}\n");
$ch = GoogleCH(strord($url));
printf("Checksum <2.0.114:\t6%u\n",$ch);
// new since Toolbar 2.0.114
$ch = ((($ch/7) << 2) | (($ch%13)&7));
$prbuf = array();
$prbuf[0] = $ch;
for($i = 1; $i < 20; $i++) {
$prbuf[$i] = $prbuf[$i-1]-9;
}
$ch = GoogleCH(c32to8bit($prbuf), 80);
//
printf("Checksum >=2.0.114:\t6%u\n",$ch);
?>
Regards,
traB
alexstapleton 09-15-2004, 07:14 PM I imagine something like the solution used in zeroFill to fix shift right may work.
Woking PHP code for new checksum is: :)
<?php
/*
Written and contributed by
Alex Stapleton,
Andy Doctorow,
Tarakan,
Bill Zeller,
Vijay "Cyberax" Bhatter
traB
This code is released into the public domain
*/
header("Content-Type: text/plain; charset=utf-8");
define('GOOGLE_MAGIC', 0xE6359A60);
//unsigned shift right
function zeroFill($a, $b)
{
$z = hexdec(80000000);
if ($z & $a)
{
$a = ($a>>1);
$a &= (~$z);
$a |= 0x40000000;
$a = ($a>>($b-1));
}
else
{
$a = ($a>>$b);
}
return $a;
}
function mix($a,$b,$c) {
$a -= $b; $a -= $c; $a ^= (zeroFill($c,13));
$b -= $c; $b -= $a; $b ^= ($a<<8);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,13));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,12));
$b -= $c; $b -= $a; $b ^= ($a<<16);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,5));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,3));
$b -= $c; $b -= $a; $b ^= ($a<<10);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,15));
return array($a,$b,$c);
}
function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) {
if(is_null($length)) {
$length = sizeof($url);
}
$a = $b = 0x9E3779B9;
$c = $init;
$k = 0;
$len = $length;
while($len >= 12) {
$a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24));
$b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24));
$c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24));
$mix = mix($a,$b,$c);
$a = $mix[0]; $b = $mix[1]; $c = $mix[2];
$k += 12;
$len -= 12;
}
$c += $length;
switch($len) /* all the case statements fall through */
{
case 11: $c+=($url[$k+10]<<24);
case 10: $c+=($url[$k+9]<<16);
case 9 : $c+=($url[$k+8]<<8);
/* the first byte of c is reserved for the length */
case 8 : $b+=($url[$k+7]<<24);
case 7 : $b+=($url[$k+6]<<16);
case 6 : $b+=($url[$k+5]<<8);
case 5 : $b+=($url[$k+4]);
case 4 : $a+=($url[$k+3]<<24);
case 3 : $a+=($url[$k+2]<<16);
case 2 : $a+=($url[$k+1]<<8);
case 1 : $a+=($url[$k+0]);
/* case 0: nothing left to add */
}
$mix = mix($a,$b,$c);
/*-------------------------------------------- report the result */
return $mix[2];
}
//converts a string into an array of integers containing the numeric value of the char
function strord($string) {
for($i=0;$i<strlen($string);$i++) {
$result[$i] = ord($string{$i});
}
return $result;
}
// converts an array of 32 bit integers into an array with 8 bit values. Equivalent to (BYTE *)arr32
function c32to8bit($arr32) {
for($i=0;$i<count($arr32);$i++) {
for ($bitOrder=$i*4;$bitOrder<=$i*4+3;$bitOrder++) {
$arr8[$bitOrder]=$arr32[$i]&255;
$arr32[$i]=zeroFill($arr32[$i], 8);
}
}
return $arr8;
}
// http://www.example.com/ - Checksum: 6540747202
$url = 'info:'.$_GET['url'];
print("url:\t{$_GET['url']}\n");
$ch = GoogleCH(strord($url));
printf("Checksum <2.0.114:\t6%u\n",$ch);
$ch=sprintf("%u", $ch);
// new since Toolbar 2.0.114
$ch = ((($ch/7) << 2) | (((int)fmod($ch,13))&7));
$prbuf = array();
$prbuf[0] = $ch;
for($i = 1; $i < 20; $i++) {
$prbuf[$i] = $prbuf[$i-1]-9;
}
$ch = GoogleCH(c32to8bit($prbuf), 80);
//
printf("Checksum >=2.0.114:\t6%u\n",$ch);
?>
I'll add new code as soon as possible to PR Monitor (http://www.alojamiento-web.org/PRMonitor/) . Both checksums will be checked on this tool and will keep track of any diferences between results for the use of old and new checksum.
I imagine something like the solution used in zeroFill to fix shift right may work.
It comes to my mind, too.. but since PHP is using 64 bits for handling of floating point operations, I believe that (int)fmod($ch,13) instead $ch%13 is safe enought.
alexstapleton 09-15-2004, 07:22 PM could you not detect if the 32nd bit is 1 at $ch = ((($ch/7) << 2) | (($ch%13)&7)); and then set it to 0. and restore the CORRECT bit afterwards? you would need to calculate what that bit should be but it should work. i am of course assuming its generating the incorrect BINARY rather than incorect Decimal output at that point of course.
as long as the binary value remains correct it does not matter if it is unsigned or not.
$ch = (((((int)($ch/7)) << 2) | (($ch%13)&7));
(int) may be needed if float based weirdness happens.
alexstapleton 09-15-2004, 07:23 PM fmod ey, never seen that one before. well done! ill get the new code on my server and hook the PageRank fetcher up too it asap. sleep now
Hehe.. thank you man :D
Usually the answer is simpler than it seems to be.
All the hard work was done by you, gprm, Morptheus, Doctorow... so I only had to take it and... Bingo ! :D
If you find any bug let me now and we'll see what can do with it
alexstapleton 09-15-2004, 07:45 PM looks like GPRM did most of the hard work on the new version ;)
wish I had more time for silly things like this these days :(
mosert 09-15-2004, 08:13 PM congratulations guys ! level 2 successful. when will begin the level 3 ? :)
Ridolph 09-16-2004, 12:27 AM Hey, has anyone put together a perl version of the (old or new) code, or is that non-working example up above the only one?
Tanks
Ridolph
gemini 09-16-2004, 01:42 PM Hello everyone,
just wanted to say what a great team you are :)
I am actually the guy who passed the original C script by Bob Jenkins to Cyberax (I didn't write it so there are no points for me). Since I'm not a programmer, but was looking for the same script in PHP i have a question:
I know there is a way to check PR through the actual G-toolbar, and it would make more sense (since google changes its algo) - is there a way to do that in PHP? I guess it wouldn't be a problem to do it on ASP.Net ... what do you think about doing it on PHP?
Also, I noticed this:
I have 2.0.113 and 2.0.114 toolbars installed on different computers and
1) 2.0.113 shows PR 4 for http://www.c21myrtlebeach.com (http://c21myrtlebeach.com)
2) 2.0.114 - PR5 for the same site.
I run the script you guys wrote and it gives 2 different checksums as we know, but both XML files shows PR 5 :blink:
Why is that?
Try this tool http://www.top25web.com/pagerank.php
for checking on
http://c21myrtlebeach.com
http://www.c21myrtlebeach.com
and then you can check the same here
http://www.ibsteam.net/gate.html?name=PageRank
I've both checksums and PRs showing from old script and new one as you wrote....
kinda strange :blink:
Thank you very much for all your hard work :)
P.S. Vijay (Cyberax) gave me the link to the forum - thanks Vijay ;).
alexstapleton 09-16-2004, 07:36 PM you could probably write a PHP module to hook into the googlebar.dll but it would be platform dependant (e.g. only run on windows or WINE) which isnt exactly ideal is it.
gemini 09-16-2004, 11:16 PM I think there was another toolbar update since the new checksum, which explains the difference in PR in my previous message. I just installed new toolbar and it is 2.0.114.1 version...
Honestly, I don't believe automatic PR requests can slow down google's servers.. They can make it accessable through API and do similar limitations that they do right now. I don't really see a point of keeping this thing in secret - why than to show it on the toolbar? Just a thought out loud.
Unregistered 09-17-2004, 11:30 AM i expect google monitor this thread, im sure mobile read could check their logs ;)
it does seem a bit silly to do the whole checksum thing, but i imagine they want to give people a reason to use their toolbar. which could be disseminated if peoples source of PR information wasnt restricted.
ZeNiTRaM 09-17-2004, 05:37 PM My script version 0.2 is going to be released soon with an option to choose what checksum version use and some XML parser improvements.
-> http://www.zenitram.info/google-info <-
Kackle 09-22-2004, 05:14 PM Does anyone remember PRMaster?
This was the first checksum crack of PageRank. The cracker made available a Windows download that directly fetched PageRank. He remained anonymous (the download happened through a third-party download site). This was in December 2001.
His code for Windows apparently copied the Google handshaking a bit too closely, because his unique Google cookie ID was embedded in the program! Either that, or it was a setup from the word go. It really seems stupid to leave the cookie ID in the handshake, since it wasn't necessary to get PageRank. How could someone clever enough to crack the checksum be that stupid? I never managed to answer this question.
I alerted people that they should use a hex editor and zero out the cookie ID, or else Google could trace them.
GoogleGuy came back and said, half jokingly, that I had ruined all the fun they were having. GoogleGuy admitted that they tracked down the author of the code and had a friendly chat with him, at which point the author of the code withdrew the program.
I continued to use the program until about May of 2002. At that point I embarrassed myself by discovering this amazing new PageRank update. Sites were shifting dramatically! Finally a friendly person suggested I download the toolbar and check it against PRMaster. Sure enough, it turned out that PRMaster was suddenly, after five months of returning accurate results, showing bogus results that were off by one or two numbers.
If Google is issuing new toolbar updates with easy-to-crack variations on the checksum, I'd be very careful. Google holds all the cards on this, and they're in a position to have more fun with it than you are. They could change the numbers based on the version that gets reported, or the format of the handshaking, and add one or subtract one. They could add one to the PageRank on even hours and subtract one on odd hours. They could make you all look silly quite effortlessly.
If you think I'm making this up, search for doofus prmaster (my username was Doofus when this happened).
Colin Dunstan 09-23-2004, 06:45 AM From what I understand the user would have to upgrade his toolbar each and every time. So I don't think Google could just change their algorithm as often as might they wish.
alien 09-23-2004, 08:08 AM From what I understand the user would have to upgrade his toolbar each and every time. So I don't think Google could just change their algorithm as often as might they wish.
The toolbar will auto update. :)
Colin Dunstan 09-23-2004, 08:30 AM I think there are a lot of people against any form of auto-updates. So I don't think Google would be making friends if it was enforcing auto-updates for something as trivial as pagerank queries.
Kackle 09-23-2004, 09:30 AM A danger that's greater than the danger of frequent toolbar checksum updates that keep you all busy with new code, is the danger that anything grabbing PageRank except from the official toolbar is likely to leave fingerprints at the Googleplex. With Google's habit of saving everything, this could contribute to a database of suspected spammers.
For example, the GPRM, v.1.1, a desktop Windows PageRank grabber, sends a user-agent of "IE5.vbx" to Google when it fetches PageRank from my box. This is a fingerprint that probably comes from some DLL on my Windows box, and the coder might not even have access to this, assuming that the DLL has to be used for contacting www.google.com.
It's also easy to compile a console program and run it under DOS32, and have it use curl or wget. You can set your own user-agent that way. But then the temptation is to use a batch file. Any IP address coming into Google that grabs PageRank faster than someone can key the domains into the toolbar is automatically suspicious.
There are a number of sites that grab PageRank for you. Certainly Google knows the IP addresses or handshaking characteristics of these sites by now.
Once you identify a PageRank grab as a grab that originated from outside the toolbar, then you save the domains requested in a separate database. Sort the domains by how many times per week they were requested, and you have a pretty nice list of who the SEO spammers might be. Assign some temp help to check them out manually, one by one, and see if any dubious techniques might be used. Ban them.
Colin Dunstan 09-23-2004, 09:36 AM Well Google cannot do that either, because it wouldn't be my fault if someone put the url of my page in one of those pagerank grabbers. Google would penalize the wrong person.
BigTree 09-23-2004, 03:48 PM Sorry for my lack of technical understanding on this topic, but:
If I use an automated PageRank checker that uses the old checksum, can Google see that these are non-toolbar queries? (assuming that there's nothing else unusual, like the frequency of requests)
I'm just not clear on whether Google changed the format of the toolbar request (in which case I presume they could detect me, if all toolbars have auto-updated to the new format, but my checker has not), or have changed the format of the results sent back.
Thanks!
doctorow 09-24-2004, 04:18 AM If the checker is implemented correctly, and if the IP address where the checker is located is not "black-listed" by Google, there is no way for Google to figure out that these are non-toolbar queries.
Unregistered 09-24-2004, 11:49 AM If the checker is implemented correctly, and if the IP address where the checker is located is not "black-listed" by Google, there is no way for Google to figure out that these are non-toolbar queries.
Doc,
Even if the PR checker doesn't yet use the new checksum? That's what I'm not clear on....
Thanks!!
Kackle 09-24-2004, 02:03 PM I haven't checked in the last year or so, but when I had the toolbar on a sniffer back then, it was clear that the dialogue between the toolbar and Google was much more extensive than the dialogue offered by all the checker code I've seen on these forums.
For one thing, the checkers don't offer or accept a Google cookie. It's true that someone could have Google cookies blocked on Explorer and the toolbar would most likely behave the same way, but it's also true that blocked cookies are relatively rare. That's a clue right there, if Google is looking for clues.
I think it is fair to say that the PageRank hackers have not made an effort to design their checkers to fly under Google's radar. All they're doing is having fun with reverse engineering and hacking.
Another question is whether the PageRank value is even particularly meaningful these days. It was up until April 2003, but in the last 17 months Google's algorithms have depended less on PageRank, and more on anchor text in the links.
doctorow 09-25-2004, 06:09 AM Doc,
Even if the PR checker doesn't yet use the new checksum? That's what I'm not clear on....
Thanks!!Yep, since there are still lots of people who have not upgraded to the latest toolbar. How should google differentiate between those running an older toolbar and those running an older PR checker with the older checksum?
doctorow 09-25-2004, 06:10 AM Totally agree with you in everything you said.
I haven't checked in the last year or so, but when I had the toolbar on a sniffer back then, it was clear that the dialogue between the toolbar and Google was much more extensive than the dialogue offered by all the checker code I've seen on these forums.
For one thing, the checkers don't offer or accept a Google cookie. It's true that someone could have Google cookies blocked on Explorer and the toolbar would most likely behave the same way, but it's also true that blocked cookies are relatively rare. That's a clue right there, if Google is looking for clues.
I think it is fair to say that the PageRank hackers have not made an effort to design their checkers to fly under Google's radar. All they're doing is having fun with reverse engineering and hacking.
Another question is whether the PageRank value is even particularly meaningful these days. It was up until April 2003, but in the last 17 months Google's algorithms have depended less on PageRank, and more on anchor text in the links.
funzie 09-25-2004, 01:14 PM For one thing, the checkers don't offer or accept a Google cookie. It's true that someone could have Google cookies blocked on Explorer and the toolbar would most likely behave the same way, but it's also true that blocked cookies are relatively rare. That's a clue right there, if Google is looking for clues.
Which is why I am waiting for a Perl port. You can use LWP to accept the cookies and create your own user agent, that way it will make it just one more step harder for google to track you. I guess i'll have to learn PHP, ive already made the LWP script, all I need is the checksum calculator. I have done it by having the Perl script run the PHP checksum and then getting the checksum from the PHP script and then getting the PR from google. The only problem is that its terribly slow since to access the PHP script it needs to access it using an HTTP request.
dharana 09-25-2004, 04:09 PM I am investigating why but the same script that works (traB's latest) with php 5.0.1 breaks with php 5.0.2.
doctorow 09-26-2004, 10:29 AM Google Toolbar 2.0.114.5 has been released. I didn't bother downloading it, but did anyone notice any difference in the pagerank algorithm again?
Unregistered 09-26-2004, 11:31 AM Yep, since there are still lots of people who have not upgraded to the latest toolbar. How should google differentiate between those running an older toolbar and those running an older PR checker with the older checksum?
OK, but I thought that, beacuse the toolbar auto-updates itself, there would quickly be few toolbars with the old checksum. Correct?
Hi Guys
Have a look at this string
/search?client=navclient-auto&googleip=O;875&ch=61598694189&freshness_check=45x6esDcU2KZ49zX6SJ31&ie=UTF-8&oe=UTF-8&features=Rank&q=info:http%3A%2F%2Fcommunity%2Ewebshots%2Ecom%2Fp hoto%2F189573646%2F189573646tQkGwb
It uses something like a freshness check
any clue's
doctorow 09-27-2004, 05:10 AM Where is that from, Raj? Are you using the updated Google Toolbar that was just released (2.0.114.5)?
doctorow 09-27-2004, 05:12 AM Btw, freshness_check was used a long time ago in older versions of the toolbar, but then disappeared.
Unregistered 09-27-2004, 06:50 PM OK, but I thought that, beacuse the toolbar auto-updates itself, there would quickly be few toolbars with the old checksum. Correct?
Doc or someone else, can you field the above question? I appreciate your help!
doctorow 09-28-2004, 03:48 AM I am not sure since I am not using the original toolbar... but are you sure it is always auto-updating itself? Is there no switch or something where you can disable it? I would be rather upset if any kind of software auto-updates itself on my computer without the option to turn it off.
Unregistered 09-28-2004, 09:54 AM i was under the impression you couldnt turn it off my self
Unregistered 09-28-2004, 10:03 AM Theres no reason why the PR lookup cant be 100% undetectable on Google's end. It's true not everything has been done to fly under googles radar yet, and theres only one real way to garuntee that your not detected, and thats a distributed PR lookup system that hooks into the GoogleToolbar and reports back to a central server the PRs of requested URLs.
That way even when google update the toolbar, you get the right result. And its easy to automate because requests can be made to a central server which then requests the PR of a URL from all the participating nodes. The latency of such a request might be a tad high, but it shouldnt be too bad. Of course if the system got used too much and there where not enough seperate nodes, google could work out what all the nodes where and block them. Simply limiting the number of requests based on the node count would stop that though.
In fact, this idea seems reasonbly plausible if we can work out a way to hook into the toolbar its self.
Unregistered 09-28-2004, 11:31 AM I am not sure since I am not using the original toolbar... but are you sure it is always auto-updating itself? Is there no switch or something where you can disable it? I would be rather upset if any kind of software auto-updates itself on my computer without the option to turn it off.
This is why I said what I did: This thread discusses the controversy about the toolbar's self-updating functionality.
http://www.webmasterworld.com/forum80/457.htm
So, my conclusion is that toolbars using the old checksum might even already be largely retired. Any other thoughts?
Unregistered 09-29-2004, 05:42 PM Sure it would be. You can actually compile the assembler stub above as an .obj and then easily link it in any c application (I've been doing this already, actually).
But it would be much more interesting to have above's code in .php, allowing for various web applications!
Doctorow, will you be willing to share more details on linking to C code as you did ? Say, the assembler to use, the header and C function signiture and how the check sum value is returned from the function etc.
Thanks and good work with open source.
Unregistered 10-19-2004, 06:57 PM I am investigating why but the same script that works (traB's latest) with php 5.0.1 breaks with php 5.0.2.
Have you figured it out yet? I'm having the issue too. The checksum value is invalid as of 5.0.2.
Unregistered 10-19-2004, 10:37 PM My PageRank Calculator gets back.. but with a new name and new features.
Now it's GoogleGetInfo and gets PageRank and 10 parameters more.
Check it at http://dhost.info/zeni/google-info
It gets all the parameters from Google in XML and returns them in an array with those parameters:
//0: URL
//1: Title
//2: PageRank. The Most Wanted!
//3: Last Crawled Date. Syntax: " 31 Aug 2004". Note the first space, use trim() to remove it.
//4: Page Text that appears in Google ;)
//5: Language of the page (2 letters)
//6: URL in the Directory. Get to http://www.google.com/<this> to see it.
//7: Title in the Directory.
//8: Description in the Directory.
//9: File Size. Ex: "59k"
//10: Looks like a checksum of the page, not sure
Read googlegetinfo.php comments for info.
How exactly would one use this on their site, I am not exactly the best at php..I have downloaded and installed in my server, but I cannot get it to work..Would someone please help me on this....
Maybe I just need the right query string...
Thanks
Unregistered 10-24-2004, 06:58 PM Hi all
Does anyone by any chance have this code in Perl ?
Many thanks
Idan
Unregistered 10-28-2004, 06:30 PM For thoes of you tyring to do it in PHP, it's working for me.
http://www.precisionseo.com/seo_tools/google_pr
Let me know if you have questions. In Perl? I will look into it. Shouldn't be that hard.
-S
fourthring 10-28-2004, 06:40 PM Sorry that was me that just posted the link for the PR checker in PHP. IF anyone wants the entire source code , please message me. I didn't do much just added a few lines to the original posting. Not claiming any credit for anything.
-FR
Unregistered 10-29-2004, 02:35 PM I would like to know if the php-scripts posted represent a violation of Google's Terms of Use.
Unregistered 11-03-2004, 07:13 PM it does, any script that you see on any website that gets the google pagerank violates the TOS.
Unregistered 11-04-2004, 03:54 AM Which is probably why you don't see a demo of this script running here on mobileread.com.
Unregistered 11-16-2004, 12:48 AM I finnally got bored and spent all day sunday porting it to perl.
Jones 11-16-2004, 04:17 AM I finnally got bored and spent all day sunday porting it to perl.
You got a link?
funzie 11-16-2004, 05:55 PM yes I do, http://www.yourcgi.com/checksum.cgi
Unregistered 11-25-2004, 03:51 PM Hi all,
i use the code founded here http://www.mobileread.com/forums/showpost.php?p=9738&postcount=186
Code worked well until i update my php to 5.02 version.
Anyone with same problem ?
for exemple http://www.myserver.com/thephpcode.php?url=http://www.example.com/
give me
url: http://www.example.com/
Checksum <2.0.114: 62147402699
Checksum >=2.0.114: 62147378985
So it should gave me somethings like
URL .... http://www.exemple.com/
Checksum <2.0.114: ..... 63090365271
Checksum >=2.0.114: ..... 64144368538
Any idea ?
Thanks in advance.
TT.
Unregistered 11-26-2004, 05:55 AM i ve reput my last php version, and now its work.
php 4.3.9 works, php 4.3.10RC1 not working.
doctorow 12-02-2004, 08:23 AM Did we kill PageRank? Just found this note (http://forums.searchenginewatch.com/showthread.php?t=3054), supposedly from Google:
"The PageRank that is displayed in the Google Toolbar is for entertainment purposes only. Due to repeated attempts by hackers to access this data, Google updates the PageRank data very infrequently because is it not secure. On average, the PR that is displayed in the Google Toolbar is several months old. If the toolbar is showing a PR of zero, this is because the user is visiting a new URL that hasn't been updated in the last update. The PR that is displayed by the Google Toolbar is not the same PR that is used to rank the webpage results so there is no need to be concerned if your PR is displayed as zero. If a site is showing up in the search results, it doesn't not have a real PR of zero, the Toolbar is just out of date"
Cough cough.
Unregistered 12-02-2004, 07:47 PM We just upgraded to php5 and getting a 403 forbidden. Anyone with a new checksum for this?
Unregistered 12-13-2004, 03:05 PM yes I do, http://www.yourcgi.com/checksum.cgi
How about a link to the Perl source?
Zealot 05-28-2005, 08:07 AM With almost 30'000 page views this thread seems quite popular. I should warn you now...
The PageRank is dead!
Yup. Just try any of your pagerank tools, including the official Google toolbar, and you'll see.... pagerank for any site you try is 0!!
Perhaps the code posted here was never meant to become public.
Unregistered 06-09-2005, 03:32 PM Is there a Perl version of this available?
Thanks!
Unregistered 06-10-2005, 10:02 AM Geee...I'm doing a vb .net project..How I wish that someone can help me by converting the php code to vb .net..or correct the this: onehttp://tumanov.com/projects/scriptlets/googlepagerankchecksum.asp
thanx!
Unregistered 06-18-2005, 10:38 PM hell yeah well i got it working
http://atopqualitysite.com/addurl.htm
martinx 06-30-2005, 05:31 AM :blink:
Hi there , when i call the following url in asp
url = "http://www.google.com/search?client=navclient-auto&ch=61482383574&features=Rank&q=info:http://www.msn.com"
set xmlhttp = CreateObject("MSXML2.ServerXMLHTTP")
xmlhttp.open "GET", url, false
xmlhttp.send ""
Response.write xmlhttp.responseText
set xmlhttp = nothing
iam get always the following error from google
Forbidden
Your client does not have permission to get URL /search?client=navclient-auto&ch=61482383574&features=Rank&q=info:http://www.msn.com from this server. (Client IP address: xxx.xxx.xxx.xxx)
Please see Google's Terms of Service posted at http://www.google.com/terms_of_service.html
Can anyone help me ?
Greetings
Unregistered 07-06-2005, 06:30 AM Looks like they have updated it again. I imagine no version of this hack now works.
(Alex)
Tommy 07-08-2005, 08:16 PM Now that Google didn't bother to release a version of its toolbar for Firefox, you can actually parse the official code which looks surprisingly similar to the one reverse-engineered in this forum ;) Only look how the seed is now calculated... looks like someone was referring to us here, heheheheee:
from pagerank.js:
var GPR_HASH_SEED = "Mining PageRank is AGAINST GOOGLE'S TERMS OF SERVICE. Yes, I'm talking to you, scammer.";
var hash = "8" + GPR_awesomeHash(page);
function GPR_awesomeHash(value) {
var kindOfThingAnIdiotWouldHaveOnHisLuggage = 16909125;
for(var i = 0;i < value.length;i ++ ) {
kindOfThingAnIdiotWouldHaveOnHisLuggage ^= GPR_HASH_SEED.charCodeAt(i % GPR_HASH_SEED.length) ^ value.charCodeAt(i);
kindOfThingAnIdiotWouldHaveOnHisLuggage = kindOfThingAnIdiotWouldHaveOnHisLuggage >>> 23 | kindOfThingAnIdiotWouldHaveOnHisLuggage << 9}
return GPR_hexEncodeU32(kindOfThingAnIdiotWouldHaveOnHisL uggage)}
function GPR_hexEncodeU32(num) {
var result = GPR_toHex8(num >>> 24);
result += GPR_toHex8(num >>> 16 & 255);
result += GPR_toHex8(num >>> 8 & 255);
return result + GPR_toHex8(num & 255)}
function GPR_toHex8(num) {
return(num < 16 ? "0" : "") + num.toString(16)}
AboutSledge 09-17-2005, 04:39 AM Google PageRank Checksum Algorithm(GoogleToolbar 3.0.125.1-big)
http://www.gamesaga.net/pagerank.php
C and PHP implelation
http://ww.gamesaga.net/pr.zip
http://ww.gamesaga.net/pr.tar.gz
AboutSledge 09-17-2005, 04:44 AM :blink:
Hi there , when i call the following url in asp
url = "http://www.google.com/search?client=navclient-auto&ch=61482383574&features=Rank&q=info:http://www.msn.com"
set xmlhttp = CreateObject("MSXML2.ServerXMLHTTP")
xmlhttp.open "GET", url, false
xmlhttp.send ""
Response.write xmlhttp.responseText
set xmlhttp = nothing
iam get always the following error from google
Forbidden
Your client does not have permission to get URL /search?client=navclient-auto&ch=61482383574&features=Rank&q=info:http://www.msn.com from this server. (Client IP address: xxx.xxx.xxx.xxx)
Please see Google's Terms of Service posted at http://www.google.com/terms_of_service.html
Can anyone help me ?
Greetings
checksum error
ch=723887121954
http://www.google.com/search?client=navclient-auto&features=Rank:&q=info:http://www.msn.com&ch=723887121954
Google Toolbar 3.0.125.1-big
http://www.gamesaga.net/pagerank.php?
AboutSledge 09-17-2005, 05:01 AM Hi all,
i use the code founded here http://www.mobileread.com/forums/showpost.php?p=9738&postcount=186
Code worked well until i update my php to 5.02 version.
Anyone with same problem ?
for exemple http://www.myserver.com/thephpcode.php?url=http://www.example.com/
give me
url: http://www.example.com/
Checksum <2.0.114: 62147402699
Checksum >=2.0.114: 62147378985
So it should gave me somethings like
URL .... http://www.exemple.com/
Checksum <2.0.114: ..... 63090365271
Checksum >=2.0.114: ..... 64144368538
Any idea ?
Thanks in advance.
TT.
PHP Data Type Converting Bug.
<?php
//header("Content-Type: text/plain; charset=utf-8");
$n = -6288256054;
var_dump($n);
$a = intval($n);
var_dump($a);
$b = (int) $n;
var_dump($b);
settype($n,"int");
var_dump($n);
?>
============good==========
float(-6288256054)
int(-1993288758)
int(-1993288758)
int(-1993288758)
============bad=================
float(-6288256054)
int(-2147483648)
int(-2147483648)
int(-2147483648)
Unregistered 09-26-2005, 09:37 PM Hi,
I have the exact same problem. You fond a workaroung ?
|