文本中包含Email=xxx@xxx.com 的,查出邮箱列表:
#!/usr/bin/perl
# note: email may be like this: Email=xxx@yyy.com
use strict;
use warnings;
my $email_count;
while (my $line = <>) { #read from file or STDIN
foreach my $email (split /\s+/, $line) {
if ( $email =~ /^Email=[-\w.]+@([a-z0-9][a-z-0-9]+\.)+[a-z]{2,4}$/i ) {
print $email . "\n";
$email_count++;
}
}
}
#print "Emails Extracted: $email_count\n";
----------------------------------- 1.sh file -----------------------------
#!/bin/bash
# use perl find only email to tmp1
./extract.pl $1 > tmp1
# tmp2 is duplicate email file
sort tmp1 | uniq -d > tmp2
# str1 is used to sed
# str2 is used to grep
for line in `cat tmp2`
do
uu="/"
dd="d;"
mm="|"
str1=$str1$uu$line$uu$dd
str2=$line$mm$str2
done
sed -E "$str1" $1 > newfile
grep -E "${str2%|*}" $1 > dupfile
rm -f tmp1 tmp2