shell技巧

email 邮箱重复删除

文本中包含Email=xxx@xxx.com 的,查出邮箱列表:

#!/usr/bin/perl
# note: email may be like this: Email=xxx@yyy.com
use strict;
use warnings;
my $email_count;
while (my $line = <>) { #read from file or STDIN
  foreach my $email (split /\s+/, $line) {
     if ( $email =~ /^Email=[-\w.]+@([a-z0-9][a-z-0-9]+\.)+[a-z]{2,4}$/i ) {
                print $email . "\n";
                $email_count++;
  }
}
}
#print "Emails Extracted: $email_count\n";

----------------------------------- 1.sh file -----------------------------
#!/bin/bash
# use perl find only email to tmp1
./extract.pl $1 > tmp1
# tmp2 is duplicate email file
sort tmp1 | uniq -d > tmp2

# str1 is used to sed
# str2 is used to grep

for line in `cat tmp2`
 do
  uu="/"
  dd="d;"
  mm="|"
  str1=$str1$uu$line$uu$dd
  str2=$line$mm$str2
 done

sed -E "$str1" $1 > newfile
grep -E "${str2%|*}" $1 > dupfile

rm -f tmp1 tmp2