87e9b76ac6
Hotmail website. ok jasper
302 lines
14 KiB
Plaintext
302 lines
14 KiB
Plaintext
Diff from GetLive CVS to incorporate Hotmail site changes.
|
|
|
|
$OpenBSD: patch-GetLive_pl,v 1.5 2009/03/07 15:22:51 merdely Exp $
|
|
--- GetLive.pl.orig Sat Jul 5 16:27:36 2008
|
|
+++ GetLive.pl Fri Mar 6 23:56:19 2009
|
|
@@ -65,11 +65,12 @@ my $Password = "";
|
|
my $Domain = 'hotmail.com';
|
|
my $CurlCommand = 'curl -k';
|
|
my $Verbosity = 1; # 0:Silent; 1:Normal; 2:Verbose; 10:debug; 100:heavy debug
|
|
-my $MailProcessor = '/usr/bin/procmail'; # Any program taking mbox formatted at stdin will do.
|
|
+my $MailProcessor = '${LOCALBASE}/bin/procmail'; # Any program taking mbox formatted at stdin will do.
|
|
my $DownloadedIdsFile = ""; # Local file with Ids of already downloaded messages.
|
|
my $RetryLimit = 2;
|
|
my $MarkRead = "No"; # No,Yes : But never when downloaded before !
|
|
my $Delete = "No"; # No,Yes : But never when downloaded before !
|
|
+my $SkipTrash = "No"; # No,Yes : Do not handle the Trash folder
|
|
my $FetchOnlyUnread = "No"; # If Yes, only messages marked unread are downloaded.
|
|
# Unlike gotmail, this is completely orthogonal to the
|
|
# DownloadedIdsFile, i.e. it is the one or the other.
|
|
@@ -78,6 +79,8 @@ my $MoveToFolder = ""; #
|
|
# contains the folder to move to. This is a hook for
|
|
# autoclassifying the mail on the server, including spam filtering.
|
|
|
|
+my $Mode = "200809";
|
|
+
|
|
# Quirk. MS generates unended <pre> tags. Hope this is a temporary weakness in their mind.
|
|
# (Hope makes living, we say in Dutch ...)
|
|
# If we want to allow it set this to one. Currently as of 8/9/2007 it must be accepted or the
|
|
@@ -275,7 +278,7 @@ sub ParseConfig {
|
|
} elsif ($Option =~ m/^Password$/i) {
|
|
$Password = $OptionValue;
|
|
} elsif ($Option =~ m/^Mode$/i) {
|
|
- warn "\n'Mode = ...' in the config file is ignored.\nThis version works only for 'Live' mailboxes !\n";
|
|
+ $Mode = $OptionValue;
|
|
} elsif ($Option =~ m/^Domain$/i) {
|
|
$Domain = $OptionValue;
|
|
} elsif ($Option =~ m/^Proxy$/i) {
|
|
@@ -298,6 +301,8 @@ sub ParseConfig {
|
|
$MarkRead = $OptionValue;
|
|
} elsif ($Option =~ m/^Delete$/i) {
|
|
$Delete = $OptionValue;
|
|
+ } elsif ($Option =~ m/^SkipTrash$/i) {
|
|
+ $SkipTrash = $OptionValue;
|
|
} elsif ($Option =~ m/^MoveToFolder$/i) {
|
|
$MoveToFolder = $OptionValue;
|
|
} else {
|
|
@@ -332,6 +337,10 @@ sub ParseConfig {
|
|
Display("MarkRead should take No or Yes as argument in the configuration file.\n",stderr=>1);
|
|
DisplayUsageAndExit();
|
|
}
|
|
+ if ($SkipTrash !~ m/^(No|Yes)$/i) {
|
|
+ Display("SkipTrash should take No or Yes as argument in the configuration file.\n",stderr=>1);
|
|
+ DisplayUsageAndExit();
|
|
+ }
|
|
if ($Delete !~ m/^(No|Yes)$/i) {
|
|
Display("Delete should take No or Yes as argument in the configuration file.\n",stderr=>1);
|
|
DisplayUsageAndExit();
|
|
@@ -340,6 +349,10 @@ sub ParseConfig {
|
|
Display("Delete must be 'No' when MoveToFolder is also specified in the configuration file.\n",stderr=>1);
|
|
DisplayUsageAndExit();
|
|
}
|
|
+ if ($Mode !~ m/^(200809|200810|200902)$/i) {
|
|
+ Display("Mode must be oneof 200809,200810,200902 as argument in the configuration file.\n",stderr=>1);
|
|
+ DisplayUsageAndExit();
|
|
+ }
|
|
}
|
|
|
|
########################################################################################################################
|
|
@@ -535,7 +548,7 @@ sub Login() {
|
|
|
|
Display("Getting hotmail index loginpage.\n", MinVerbosity =>2);
|
|
|
|
- my ($LoginPageAsString,$GetPageUrl) = GetPage(Url => "http://mail.live.com",FollowForward => 1);
|
|
+ my ($LoginPageAsString,$GetPageUrl) = GetPage(Url => "http://mail.live.com/",FollowForward => 1);
|
|
|
|
# We expect here a number of functions now (aug 2007) to be hidden in a javascript
|
|
# that is loaded separately. Let's load and append.
|
|
@@ -690,7 +703,9 @@ REENTRY_DARREN_QUIRK:
|
|
|
|
Display("LoginUrl 2 : '$LoginUrl'.\n",MinVerbosity => 10);
|
|
# Following the redirect : Third step of login.
|
|
- Display("Following redirect.\n",MinVerbosity => 2);
|
|
+ Display("Following redirect.\n",MinVerbosity => 2);
|
|
+
|
|
+LABEL_20090214:
|
|
($LoginPageAsString,$GetPageUrl) = GetPage(Url => $LoginUrl,FollowForward => 1);
|
|
|
|
$LoginUrl = $GetPageUrl;
|
|
@@ -716,6 +731,19 @@ REENTRY_DARREN_QUIRK:
|
|
} elsif ($LoginPageAsString =~ m/MSNPlatform\/browsercompat.js/) {
|
|
$LoggedIn = 1;
|
|
}
|
|
+
|
|
+ # Since 14/2/2009 we are not logged in anymore now ...
|
|
+
|
|
+ my $Tried20090214 = 0;
|
|
+ if (!$LoggedIn && !$Tried20090214) {
|
|
+ if ($LoginPageAsString !~ m/src=\"([^\"]*)\"/) {
|
|
+ die "Could not log in. Maybe structure has changed or was not foreseen.";
|
|
+ }
|
|
+ $LoginUrl = HtmlUnescape($1);
|
|
+ Display("Following link '$LoginUrl'.\n",MinVerbosity=>2);
|
|
+ $Tried20090214 = 1;
|
|
+ goto LABEL_20090214
|
|
+ }
|
|
|
|
die "Could not log in. Maybe structure has changes or was not foreseen." unless $LoggedIn;
|
|
|
|
@@ -960,7 +988,7 @@ sub GetMessagesFromFolder($) {
|
|
"au=$AuthUser&".
|
|
"ptid=0";
|
|
my $PostData = "";
|
|
- if ($Page == 1) {
|
|
+ if ($Mode =~ m/200809/ && $Page == 1) {
|
|
$PostData = "cn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox".
|
|
"&".
|
|
"mn=GetInboxData".
|
|
@@ -971,7 +999,7 @@ sub GetMessagesFromFolder($) {
|
|
"v=1".
|
|
"&".
|
|
"mt=$MT";
|
|
- } else {
|
|
+ } elsif ($Mode =~ m/200809/) {
|
|
$PostData = "cn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox".
|
|
"&".
|
|
"mn=GetInboxData".
|
|
@@ -979,11 +1007,37 @@ sub GetMessagesFromFolder($) {
|
|
"d=true,true,{".uri_escape("\"$FolderId\"").",25,NextPage,0,Date,false,".
|
|
uri_escape("\"$pnAm\"") . "," .
|
|
uri_escape("\"$pnAd\"") . "," .
|
|
- "$Page,2,false,null,false,$ReportedNrMessages},false,null".
|
|
+ "$Page,2,false,null,false,$ReportedNrMessages},false,null".
|
|
"&".
|
|
"v=1".
|
|
"&".
|
|
"mt=$MT";
|
|
+ } elsif ($Mode =~ m/(200810|200902)/ && $Page == 1) {
|
|
+ $PostData = "cn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox".
|
|
+ "&".
|
|
+ "mn=GetInboxData".
|
|
+ "&".
|
|
+ "d=true,true,{".uri_escape("\"$FolderId\"").",0,0,Date,false,".
|
|
+ uri_escape("\"$pnAm\"") . "," .
|
|
+ uri_escape("\"$pnAd\"") . "," .
|
|
+ "$Page,2,false,%22%22,0,-1,Off},false,null".
|
|
+ "&".
|
|
+ "v=1".
|
|
+ "&".
|
|
+ "mt=$MT";
|
|
+ } elsif ($Mode =~ m/(200810|200902)/) {
|
|
+ $PostData = "cn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox".
|
|
+ "&".
|
|
+ "mn=GetInboxData".
|
|
+ "&".
|
|
+ "d=true,true,{".uri_escape("\"$FolderId\"").",NextPage,0,Date,false,".
|
|
+ uri_escape("\"$pnAm\"") . "," .
|
|
+ uri_escape("\"$pnAd\"") . "," .
|
|
+ "$Page,2,false,%22%22,$ReportedNrMessages,-1,Off},false,null".
|
|
+ "&".
|
|
+ "v=1".
|
|
+ "&".
|
|
+ "mt=$MT";
|
|
}
|
|
|
|
($PageAsString,$GetPageUrl) = GetPage(Url => $Url,CurlDataArg => $PostData);
|
|
@@ -1003,8 +1057,9 @@ sub GetMessagesFromFolder($) {
|
|
|
|
# get the ID for the trash folder
|
|
$TrashFolderId = $PageAsString;
|
|
- $TrashFolderId =~ m/sysFldrs\s*?:\s*?{\s*?trashFid\s*?:\s*?\"(.*?)\".*?}/si;
|
|
+ $TrashFolderId =~ m/sysFldrs\s*?:\s*?{.*?trashFid\s*?:\s*?\"(.*?)\".*?}/si;
|
|
$TrashFolderId = $1;
|
|
+ die "Could not find TrashFolderId." unless $TrashFolderId;
|
|
|
|
# get the session variables as well as the request handler
|
|
$PageAsString =~ m/fppCfg\s*?:\s*?{\s*?RequestHandler\s*?:\s*?\"(.*?)\".*?SessionId\s*?:\s*?\"(.*?)\".*?AuthUser\s*?:\s*?\"(.*?)\".*?}/si;
|
|
@@ -1016,9 +1071,12 @@ sub GetMessagesFromFolder($) {
|
|
|
|
# To start with we limit us to a MessagesArea between
|
|
# <table class="dItemListContentTable"..> ... </table..>
|
|
- if ($PageAsString !~ m/<table class=\"d?ItemListContentTable[^>]*>(.*?)<\/table/si) {
|
|
+ if ($Mode =~ m/200809/ && $PageAsString !~ m/<table class=\"d?ItemListContentTable[^>]*>(.*?)<\/table/si) {
|
|
die "Could not correctly parse the messages table.";
|
|
}
|
|
+ if ($Mode =~ m/(200810|200902)/ && $PageAsString !~ m/<table class=\"d?InboxTable[^>]*>(.*?)<\/table/si) {
|
|
+ die "Could not correctly parse the messages table.";
|
|
+ }
|
|
my $MessagesArea = $1;
|
|
|
|
# In this message area there's the body of the table containing messages.
|
|
@@ -1046,24 +1104,44 @@ sub GetMessagesFromFolder($) {
|
|
my $MessageId = $1;
|
|
my $MessageAd = $2;
|
|
|
|
- # Goto 5th column.(to get the from)
|
|
+ # Goto column containing from info.(to get the from)
|
|
my $TdLine = "";
|
|
- for (my $Idx=0;$Idx<4;$Idx++) {
|
|
+ my $Column = 5;
|
|
+ if ($Mode =~ m/(200810|200902)/) { $Column = 4; }
|
|
+ for (my $Idx=0;$Idx<($Column-1);$Idx++) {
|
|
$MessagesArea =~ m/<td(.*?)>(.*?)<\/td\s*>/i;
|
|
$MessagesArea = $';
|
|
$TdLine = $2;
|
|
}
|
|
- if ($TdLine !~ m/class=\"(truncate)?from\">(<[^>]+>)?(.*?)<[^>]+>/si) {
|
|
- die "Parse error for 'from'.";
|
|
+ my $From = "";
|
|
+ if ($Mode =~ /200809/) {
|
|
+ if ($TdLine !~ m/class=\"(truncate)?from\">(<[^>]+>)?(.*?)<[^>]+>/si) {
|
|
+ die "Parse error for 'from'.";
|
|
+ }
|
|
+ $From = HtmlUnescape($3);
|
|
+ } elsif ($Mode =~ /(200810|200902)/ ) {
|
|
+ if ($TdLine !~ m/(<[^>]+>)?(.*?)<[^>]+>/si) {
|
|
+ die "Parse error for 'from'.";
|
|
+ }
|
|
+ $From = HtmlUnescape($2);
|
|
}
|
|
- my $From = HtmlUnescape($3);
|
|
Display("From '$From'.\n",MinVerbosity => 10);
|
|
$MessagesFrom[$NrMessagesDetected] = $From;
|
|
|
|
# Further to the subject column. There we pick up also the href of the message.
|
|
- $MessagesArea =~ m/<td(.*?)>(.*?)<\/td\s*>/i;
|
|
- $MessagesArea = $';
|
|
- $TdLine = $2;
|
|
+ if ($Mode =~ /200809/) {
|
|
+ $MessagesArea =~ m/<td(.*?)>(.*?)<\/td\s*>/i;
|
|
+ $MessagesArea = $';
|
|
+ $TdLine = $2;
|
|
+ } elsif ($Mode =~ /200810/) {
|
|
+ $MessagesArea =~ m/<td class=\"SubjectCol\">(.*?)<\/td\s*>/i;
|
|
+ $MessagesArea = $';
|
|
+ $TdLine = $1;
|
|
+ } elsif ($Mode =~ /200902/) {
|
|
+ $MessagesArea =~ m/<td class=\"Sbj\">(.*?)<\/td\s*>/i;
|
|
+ $MessagesArea = $';
|
|
+ $TdLine = $1;
|
|
+ }
|
|
if ($TdLine !~ m/<a href=\"(.*?)\"\s*>(.*?)<\/a>/si) {
|
|
die "Parse error for 'subject'.";
|
|
}
|
|
@@ -1091,11 +1169,19 @@ sub GetMessagesFromFolder($) {
|
|
Display("Search for one more page.\n",MinVerbosity => 10);
|
|
# Search for 'next page' href
|
|
my $NextPageAd = "";
|
|
- if ($PageAsString =~
|
|
- m/<li([^>]*)>\s*<a href=\"([^\"]+)\"[^>]*><img src=\"[^\"]*\" class=\"i_nextpage\".*?><\/a>/si) {
|
|
- $NextPageAd = $1;
|
|
+ if ($Mode =~ m/200809/) {
|
|
+ if ($PageAsString =~
|
|
+ m/<li([^>]*)>\s*<a href=\"([^\"]+)\"[^>]*><img src=\"[^\"]*\" class=\"i_nextpage\".*?><\/a>/si) {
|
|
+ $NextPageAd = $1;
|
|
+ }
|
|
+ die "Could not find an expected next page href. Probably page structure changed." unless $NextPageAd;
|
|
+ } elsif ($Mode =~ m/(200810|200902)/) {
|
|
+ if ($PageAsString =~
|
|
+ m/<li([^>]*)>\s*<a href=\"([^\"]+)\"[^>]*><img class=\"PageNavigationNext\".*?><\/a>/si) {
|
|
+ $NextPageAd = $1;
|
|
+ }
|
|
+ die "Could not find an expected next page href. Probably page structure changed." unless $NextPageAd;
|
|
}
|
|
- die "Could not find an expected next page href. Probably page structure changed." unless $NextPageAd;
|
|
|
|
if ($NextPageAd !~ m /pnAm=\"([^\"]*)/) {
|
|
die "Could not find pnAm in '$NextPageAd'.";
|
|
@@ -1111,7 +1197,7 @@ sub GetMessagesFromFolder($) {
|
|
Display("Next page Ad : '$pnAd'.\n",MinVerbosity => 10);
|
|
Display("Next page Am : '$pnAm'.\n",MinVerbosity => 10);
|
|
}
|
|
- }
|
|
+ }
|
|
}
|
|
|
|
########################################################################################################################
|
|
@@ -1221,10 +1307,10 @@ sub GetFolders() {
|
|
# Scan the line for all folders, their href and title.
|
|
# NrFolders on the fly;
|
|
while ($FolderPageAsString =~
|
|
- m/<td class=\"d?ManageFoldersFolderNameCol\"><a\s*href=\"([^\"]*)\"\s*>(.*?)<\/a>\s*<\/td>\s*<td class=\"d?ManageFoldersTotalCountCol[^\"]*\">(\d+)<\/td>/gc) {
|
|
- $FolderHrefs[$NrFolders] = $1;
|
|
- $FolderNames[$NrFolders] = HtmlUnescape($2);
|
|
- $FolderNrMessages[$NrFolders] = $3;
|
|
+ m/<td class=\"d?ManageFoldersFolderNameCol\"><a\s*(class=\"PrimaryTextColor\"\s*)?href=\"([^\"]*)\"\s*>(.*?)<\/a>\s*<\/td>\s*(<td\s*class=\"ManageFoldersBufferCol\">[^<]*<\/td>\s*)?<td class=\"d?ManageFoldersTotalCountCol[^\"]*\">(\d+)<\/td>/gc) {
|
|
+ $FolderHrefs[$NrFolders] = $2;
|
|
+ $FolderNames[$NrFolders] = HtmlUnescape($3);
|
|
+ $FolderNrMessages[$NrFolders] = $5;
|
|
if ( $FolderHrefs[$NrFolders] !~ m/FolderID=([^&]*)/ ) {
|
|
die "Could not detect FolderId.";
|
|
}
|
|
@@ -1257,6 +1343,9 @@ GetFolders();
|
|
|
|
for (my $FolderIdx=0;$FolderIdx<$NrFolders;$FolderIdx++) {
|
|
next if (scalar keys %FoldersToProcess && not exists $FoldersToProcess{lc $FolderNames[$FolderIdx]});
|
|
+ next if ( ($SkipTrash =~ m/^Yes$/i) && ($FolderIds[$FolderIdx] eq $TrashFolderId) );
|
|
+ # JDLA hack. Drafts folder does not work, also not in real. Assuming 000-...-4 is the draft folder.
|
|
+ next if ( ($Mode =~ m/(200810|200902)/) && ($FolderIds[$FolderIdx] =~ m/^(0|-)*4$/));
|
|
Display("\nProcessing folder $FolderNames[$FolderIdx].\n",MinVerbosity => 1);
|
|
GetMessagesFromFolder($FolderIdx);
|
|
Display("$NrMessagesDetected/$NrMessagesUnread Messages/Unread.\n",MinVerbosity => 1);
|