comparison src/nabble/model/MailingLists.java @ 0:7ecd1a4ef557

add content
author Franklin Schmidt <fschmidt@gmail.com>
date Thu, 21 Mar 2019 19:15:52 -0600
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7ecd1a4ef557
1 package nabble.model;
2
3 import fschmidt.db.DbDatabase;
4 import fschmidt.db.DbNull;
5 import fschmidt.util.java.DateUtils;
6 import fschmidt.util.java.HtmlUtils;
7 import fschmidt.util.mail.Mail;
8 import fschmidt.util.mail.MailAddress;
9 import fschmidt.util.mail.MailAddressException;
10 import fschmidt.util.mail.MailEncodingException;
11 import fschmidt.util.mail.MailException;
12 import fschmidt.util.mail.MailHome;
13 import fschmidt.util.mail.MailIterator;
14 import fschmidt.util.mail.MailParseException;
15 import fschmidt.util.mail.Pop3Server;
16 import fschmidt.util.mail.javamail.MstorInServer;
17 import nabble.model.lucene.HitCollector;
18 import nabble.model.lucene.LuceneSearcher;
19 import org.apache.lucene.document.Document;
20 import org.apache.lucene.queryParser.ParseException;
21 import org.apache.lucene.queryParser.QueryParser;
22 import org.apache.lucene.search.BooleanQuery;
23 import org.apache.lucene.search.Filter;
24 import org.apache.lucene.search.Query;
25 import org.apache.lucene.util.Version;
26 import org.slf4j.Logger;
27 import org.slf4j.LoggerFactory;
28
29 import java.io.File;
30 import java.io.IOException;
31 import java.io.PrintWriter;
32 import java.io.StringWriter;
33 import java.io.UnsupportedEncodingException;
34 import java.sql.Connection;
35 import java.sql.PreparedStatement;
36 import java.sql.ResultSet;
37 import java.sql.SQLException;
38 import java.text.DateFormat;
39 import java.text.SimpleDateFormat;
40 import java.util.ArrayList;
41 import java.util.Arrays;
42 import java.util.Collection;
43 import java.util.Date;
44 import java.util.HashSet;
45 import java.util.List;
46 import java.util.Set;
47 import java.util.TimeZone;
48 import java.util.concurrent.TimeUnit;
49 import java.util.regex.Matcher;
50 import java.util.regex.Pattern;
51 import javax.mail.internet.InternetAddress;
52 import javax.mail.internet.AddressException;
53
54
55 final class MailingLists {
56 private static final Logger logger = LoggerFactory.getLogger(MailingLists.class);
57
58 private static final float nameChangeFreq = Init.get("mlNameChangeFreq",0.1f);
59 static final Pop3Server pop3Server = (Pop3Server)Init.get("mailingListArchivePop3Server");
60
61 private MailingLists() {} // never
62
63 static {
64 if( Init.hasDaemons ) {
65 runMailingLists();
66 }
67 }
68
69 private static void runMailingLists() {
70 if( pop3Server == null ) {
71 logger.warn("no pop3 server defined, mailing lists not running");
72 return;
73 }
74 Executors.scheduleWithFixedDelay(new Runnable() {
75 public void run(){
76 try {
77 processMail();
78 processFwds();
79 } catch(MailException e) {
80 logger.error("mailing list processing",e);
81 }
82 }
83 }, 10, 10, TimeUnit.SECONDS );
84 logger.info("mailing lists enabled");
85 }
86
87 private static void processMail() {
88 MailIterator mails = pop3Server.getMail();
89 int count = 0;
90 try {
91 while( mails.hasNext() ) {
92 Mail mail = mails.next();
93 try {
94 makePost(mail);
95 count++;
96 } catch (MailAddressException e) {
97 logger.warn("mail:\n"+mail.getRawInput(),e); // screwed-up mail
98 } catch (Exception e) {
99 logger.error("mail:\n"+mail.getRawInput(),e);
100 }
101 }
102 } finally {
103 mails.close();
104 if( count > 0 )
105 logger.error("Processed " + count + " emails.");
106 }
107 }
108
109 static MailingList.ImportResult importMbox(File file,MailingListImpl ml,String mailErrorsToS,int maxErrors)
110 throws ModelException
111 {
112 final DateFormat mailmanDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy");
113 final DateFormat mailDateFormat = new javax.mail.internet.MailDateFormat();
114 mailmanDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
115 mailDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
116 MailAddress mailErrorsTo = new MailAddress(mailErrorsToS);
117 MstorInServer server = new MstorInServer(file);
118 server.setMetaEnabled(false);
119 MailIterator mails = server.getMail();
120 try {
121 int imported = 0;
122 int errors = 0;
123 while( mails.hasNext() ) {
124 Mail mail = mails.next();
125 try {
126 try {
127 mail.getFrom();
128 } catch (MailAddressException e) {
129 String[] from = mail.getHeader("From");
130 if (from == null || from.length == 0)
131 throw new MailAddressException("'From' not found in the header", e);
132 mail.setHeader("From", from[0].replace(" at ", "@"));
133 }
134 Date sentDate = mail.getSentDate();
135 if ((sentDate==null || sentDate.getTime() < 0) && mail.getHeader("Date")!=null) {
136 String dateH = mail.getHeader("Date")[0];
137 if (dateH!=null) {
138 try {
139 sentDate = mailmanDateFormat.parse(dateH);
140 } catch (java.text.ParseException e) {}
141 if (sentDate!=null)
142 mail.setSentDate(sentDate);
143 }
144 }
145 if ((sentDate==null || sentDate.getTime() < 0) && mail.getHeader("Resent-date")!=null) {
146 String dateH = mail.getHeader("Resent-date")[0];
147 if (dateH!=null) {
148 try {
149 sentDate = mailDateFormat.parse(dateH);
150 } catch (java.text.ParseException e) {}
151 if (sentDate!=null)
152 mail.setSentDate(sentDate);
153 }
154 }
155 if ((sentDate==null || sentDate.getTime() < 0)) {
156 String rawInput = mail.getRawInput();
157 try {
158 String dateH = rawInput.substring(rawInput.indexOf(' ',5), rawInput.indexOf('\n')).trim();
159 sentDate = mailmanDateFormat.parse(dateH);
160 } catch (Exception e) {
161 logger.error("",e); // what kind of exception is ok?
162 }
163 if (sentDate!=null)
164 mail.setSentDate(sentDate);
165 }
166 makeForumPost(mail,ml,true);
167 imported++;
168 } catch (Exception e) {
169 sendErrorMail(mail, e, mailErrorsTo);
170 errors++;
171 if( errors >= maxErrors )
172 throw ModelException.newInstance("import_mbox_errors",""+errors+" errors reached after importing "+imported+" messages");
173 }
174 }
175 final int imported2 = imported;
176 final int errors2 = errors;
177 return new MailingList.ImportResult() {
178 public int getImported() { return imported2; }
179 public int getErrors() { return errors2; }
180 };
181 } finally {
182 mails.close();
183 }
184 }
185
186 private static void makePost(Mail mail)
187 throws ModelException
188 {
189 MailingListImpl ml = getMailingList(mail);
190 if (ml == null) {
191 logger.info("Mailing list not found for: " + Arrays.asList(mail.getTo()));
192 return;
193 }
194 if (checkForward(mail, ml)) {
195 return;
196 }
197 if (checkPending(mail, ml)) {
198 return;
199 }
200 makeForumPost(mail, ml, false);
201 }
202
203 private static void makeForumPost(Mail mail, MailingListImpl ml, boolean isImport)
204 throws ModelException
205 {
206 String messageID = getMessageID(mail, msgFmt);
207 mail.setMessageID(messageID);
208
209 String message = mail.getRawInput();
210 message = message.replace("\000",""); // postgres can't handle 0
211 if( !msgFmt.isOk(message) )
212 return;
213 String text = msgFmt.getMailText(message,null);
214 NodeImpl forum = ml.getForumImpl();
215
216 if( doNotArchive(text) || (doNotArchive(mail) && !ml.ignoreNoArchive()) ) {
217 logger.info("XNoArchive in "+forum.getSubject());
218 return;
219 }
220
221 DbDatabase db;
222 try {
223 db = forum.siteKey.getDb();
224 } catch(UpdatingException e) {
225 return; // hack for schema migration
226 }
227 db.beginTransaction();
228 try {
229 forum = (NodeImpl)forum.getGoodCopy();
230 MailingListImpl mailingList = forum.getMailingListImpl();
231
232 {
233 NodeImpl post = forum.getNodeImplFromMessageID(messageID);
234 if( post != null) {
235 if(isImport)
236 return;
237 throw new RuntimeException("MessageID "+messageID+" already in db for forum "+forum.getId());
238 }
239 }
240
241 UserImpl user = getUser(mail, mailingList);
242 if (user.isNoArchive())
243 return;
244
245 String subject = mailingList.fixSubject(mail.getSubject());
246 if( subject==null || subject.trim().equals("") )
247 subject = "(no subject)";
248
249 if (!isImport) {
250 ListServer oldListServer = mailingList.getListServer();
251 if (oldListServer==ListServer.unknown || oldListServer instanceof ListServer.Mailman) {
252 ListServer listServer = detectListServer(mail);
253 if (listServer!=null && listServer!=oldListServer && (oldListServer==ListServer.unknown || listServer==ListServer.mailman21)) {
254 mailingList.setListServer(listServer);
255 mailingList.update();
256 }
257 }
258 }
259
260 Date now = new Date();
261 Date date = mail.getSentDate();
262 if( date==null || date.compareTo(now) > 0 || date.getTime() < 0)
263 date = now;
264
265 boolean isGuessedParent = false;
266 String parentID = getParentID(mail, messageID);
267 NodeImpl parent = forum.getNodeImplFromMessageID(parentID);
268 if ( parent!=null && threadBySubject(forum, subject, parent.getSubject()) ) {
269 parent = null;
270 }
271
272 NodeImpl[] orphans = NodeImpl.getFromParentID(messageID,mailingList);
273 if ( parent==null ) {
274 try {
275 parent = guessParent(mail, date, mailingList, subject, orphans);
276 if ( parent != null )
277 isGuessedParent = true;
278 } catch(IOException e) {
279 logger.error("guessParent failed",e);
280 }
281 }
282
283 NodeImpl post = NodeImpl.newChildNode(Node.Kind.POST,user,subject,message,msgFmt,parent==null?forum:parent);
284 if( parent==null && parentID != null ) {
285 logger.debug("Orphan "+messageID+" starting new thread ");
286 isGuessedParent = true;
287 }
288
289 post.setWhenCreated(date);
290 post.setMessageID(messageID);
291 if (isGuessedParent) {
292 post.setGuessedParent(parentID);
293 } else if (parent==null) {
294 // for root posts which do not have parentID set guess flag to uncertain
295 post.setGuessedParent((Boolean) null);
296 }
297 post.insert(false);
298 if( isGuessedParent && parentID==null )
299 logger.debug("no parentID for "+post);
300
301 for (NodeImpl orphan : orphans) {
302 try {
303 if (!threadBySubject(forum, subject, orphan.getSubject())) {
304 orphan.changeParentImpl(post);
305 }
306 } catch (ModelException.NodeLoop e) {
307 logger.error("", e); // should not happen now...
308 orphan.getDbRecord().fields().put("parent_message_id", DbNull.STRING);
309 orphan.getDbRecord().update();
310 }
311 }
312
313 db.commitTransaction();
314 } finally {
315 db.endTransaction();
316 }
317 }
318
319 private static boolean threadBySubject(Node forum, String subject, String parentSubject) {
320 if (!forumsThreadedBySubject.contains(forum.getId())) return false;
321 return ! normalizeSubject(subject).equals(normalizeSubject(parentSubject));
322 }
323
324 static final Set<Long> forumsThreadedBySubject = new HashSet<Long>(Arrays.asList((Long[])Init.get("forumsThreadedBySubject", new Long[0])));
325
326
327 private static void sendErrorMail(Mail mail, Exception e, MailAddress mailTo) {
328 if( e instanceof UnsupportedEncodingException
329 || e instanceof MailAddressException
330 || e instanceof MailEncodingException
331 || e instanceof MailParseException
332 ) {
333 logger.info(e.toString());
334 } else {
335 logger.error("",e);
336 }
337 StringWriter sb = new StringWriter();
338 PrintWriter out = new PrintWriter(sb);
339 e.printStackTrace( out );
340 out.close();
341 String msg = e.getMessage();
342 if (msg!=null && msg.indexOf('\n')>=0) msg = msg.substring(0, msg.indexOf('\n')).trim();
343 String subject = "error: "+msg;
344 MailSubsystem.sendErrorMail(mail, mailTo, subject, sb.toString());
345 }
346
347 private static String getParentID(Mail mail, String messageID) {
348 String[] inReplyTos = mail.getHeader("In-Reply-To");
349 if( inReplyTos == null ) {
350 inReplyTos = mail.getHeader("In-Reply-to");
351 if( inReplyTos != null )
352 logger.error("does this happen - case sensitive");
353 }
354 if( inReplyTos != null ) {
355 for (String inReplyTo : inReplyTos) {
356 for( String s : MailSubsystem.stripMultiBrackets(inReplyTo) ) {
357 if (!s.equals(messageID) && !s.equals("")) return s;
358 }
359 }
360 }
361 try {
362 String[] refs = mail.getHeader("References");
363 if( refs != null ) {
364 for (String ref : refs) {
365 List<String> list = MailSubsystem.stripMultiBrackets(ref);
366 if( list.isEmpty() )
367 continue;
368 String s = list.get(list.size()-1);
369 if (!s.equals(messageID) && !s.equals("")) return s;
370 }
371 }
372 } catch(MailParseException e) {
373 logger.warn("screwed up References for messageID="+messageID,e);
374 }
375 return null;
376 }
377
378 private static NodeImpl guessParent(Mail mail, Date date, MailingListImpl mailingList, String subject, NodeImpl[] orphans) throws IOException {
379 Set<NodeImpl> offspring = new HashSet<NodeImpl>();
380 for (NodeImpl orphan : orphans) {
381 for (NodeImpl n : orphan.getDescendantImpls()) {
382 offspring.add(n);
383 }
384 }
385 return guessParent(mail, date, mailingList, subject, offspring);
386 }
387
388 static NodeImpl guessParent(NodeImpl post, Collection<NodeImpl> ignore) {
389 Mail mail = MailHome.newMail(post.getMessage().getRaw());
390 NodeImpl forum = post.getAppImpl();
391 if (forum == null) {
392 return null; // detached post
393 }
394 MailingListImpl mailingList = forum.getAssociatedMailingListImpl();
395 if (mailingList == null) {
396 return null; // forum no longer a mailing list
397 }
398 try {
399 Set<NodeImpl> ignoreSet = new HashSet<NodeImpl>();
400 for( NodeImpl n : post.getDescendantImpls() ) {
401 ignoreSet.add(n);
402 }
403 if (ignore != null) {
404 ignoreSet.addAll(ignore);
405 }
406 return guessParent(mail, post.getWhenCreated(), mailingList, post.getSubject(), ignoreSet);
407 } catch (IOException e) {
408 throw new RuntimeException(e);
409 }
410 }
411
412 private static NodeImpl guessParent(Mail mail, Date date, MailingListImpl mailingList, String subject, Set<NodeImpl> offspring)
413 throws IOException {
414 // attach to ancestor if any
415 NodeImpl forum = mailingList.getForumImpl();
416 try {
417 String[] refs = mail.getHeader("References");
418 if( refs != null ) {
419 for( String ref : refs ) {
420 final List<String> list = MailSubsystem.stripMultiBrackets(ref);
421 for( int i=list.size()-1; i>=0; i-- ) {
422 String ancestorID = list.get(i);
423 NodeImpl parent = forum.getNodeImplFromMessageID(ancestorID);
424 if (parent!=null && !offspring.contains(parent) && !threadBySubject(forum,subject,parent.getSubject())) {
425 logger.debug("Attaching orphan "+mail.getMessageID()+" to grandparent "+parent);
426 return parent;
427 }
428 }
429 }
430 }
431 } catch(MailParseException e) {
432 logger.warn("screwed up References",e);
433 }
434 // handle lost In-Reply-To headers
435 // heuristics - use Thread-Topic header to find matching subjects in last 3 days
436 String[] threadTopics = mail.getHeader("Thread-Topic");
437 String threadTopic = threadTopics==null?null:mailingList.fixSubject(threadTopics[0]);
438 long forumId = forum.getId();
439 Filter filter = Lucene.getRangeFilter(DateUtils.addDays(date, -7), date);
440 SiteImpl site = forum.getSiteImpl();
441 LuceneSearcher searcher = Lucene.newSearcher(site);
442 try {
443 if( threadTopic != null ) {
444 threadTopic = threadTopic.toLowerCase();
445 if (threadTopic.startsWith("re: "))
446 threadTopic = threadTopic.substring(4);
447 threadTopic = threadTopic.trim();
448 if (!threadTopic.equals("")) {
449 NodeImpl parent = (NodeImpl)getPriorPost(site,searcher, forumId, threadTopic, filter, date, offspring);
450 if( parent!=null && !offspring.contains(parent) && !threadBySubject(forum,subject,parent.getSubject())) return parent;
451 }
452 }
453 // if no thread-topic, but subject starts with Re:, try with subject
454 subject = subject.toLowerCase();
455 if( subject.startsWith("re: ") ) {
456 subject = subject.substring(4).trim();
457 if ( !subject.equals(threadTopic) && !"".equals(subject) ) {
458 NodeImpl parent = (NodeImpl)getPriorPost(site,searcher, forumId, subject, filter, date, offspring);
459 if( parent!=null && !offspring.contains(parent)) return parent;
460 }
461 }
462 } finally {
463 searcher.close();
464 }
465 return null;
466 }
467
468 private static boolean checkPending(Mail mail, MailingListImpl ml) {
469 if (checkPending(getMessageID(mail, msgFmt), ml)) {
470 return true;
471 }
472 String[] xMessageId = mail.getHeader("X-Message-Id");
473 if (xMessageId != null && xMessageId.length > 0 && checkPending(MailSubsystem.stripBrackets(xMessageId[0]), ml)) {
474 return true;
475 }
476 xMessageId = mail.getHeader("X-Original-Message-Id");
477 if (xMessageId != null && xMessageId.length > 0 && checkPending(MailSubsystem.stripBrackets(xMessageId[0]), ml)) {
478 return true;
479 }
480 return false;
481 }
482
483 private static boolean checkPending(String messageID, MailingListImpl ml) {
484 NodeImpl pendingPost = ml.getForumImpl().getNodeImplFromMessageID(messageID);
485 if( pendingPost==null )
486 return false;
487 Node.MailToList mail = pendingPost.getMailToList();
488 if( mail == null ) {
489 logger.warn("MessageID "+messageID+" already in db as "+pendingPost+" for forum "+ml.getId());
490 } else if( !mail.isPending() ) {
491 logger.error("post not pending "+pendingPost);
492 } else {
493 mail.clearPending();
494 }
495 return true;
496 }
497
498 private static boolean doNotArchive(Mail mail) {
499 String[] xNoArchive = mail.getHeader("X-No-Archive");
500 if (xNoArchive != null && xNoArchive.length > 0 && "yes".equalsIgnoreCase(xNoArchive[0])) {
501 return true;
502 }
503 String[] xArchive = mail.getHeader("X-Archive");
504 if (xArchive != null && xArchive.length > 0 && xArchive[0] != null && (xArchive[0].startsWith("expiry") || "no".equalsIgnoreCase(xArchive[0]))) {
505 return true;
506 }
507 String[] archive = mail.getHeader("Archive");
508 if (archive != null && archive.length > 0 && "no".equalsIgnoreCase(archive[0])) {
509 return true;
510 }
511 return false;
512 }
513
514 private static final Pattern xNoArchivePtn = Pattern.compile("(?im)\\AX-No-Archive: yes *$");
515 private static boolean doNotArchive(String text) {
516 return xNoArchivePtn.matcher(text).find();
517 }
518
519 private static MailingListImpl getMailingList(Mail mail) {
520 MailingListImpl ml = null;
521 String[] a = mail.getHeader("Envelope-To");
522 if (a == null)
523 a = mail.getHeader("X-Delivered-to"); // fastmail
524 if (a == null)
525 a = mail.getHeader("X-Original-To"); // postfix
526 if( a.length > 1 )
527 a = new String[] { a[0] };
528 for( String address : a[0].split(",") ) {
529 address = address.trim();
530 MailingListImpl candidate = MailingListImpl.getMailingListByEnvelopeAddress(address);
531 if (candidate == null) {
532 // escaped list mail, bounce mail
533 String returnPath = MailSubsystem.getReturnPath(mail);
534 if( returnPath.equals(address) )
535 continue; // ignore spam
536 MailSubsystem.bounce(mail,
537 "Delivery to the following recipient failed permanently:\n\n "
538 + address
539 + "\n\nNo archive exists for this address.\n"
540 );
541 logger.warn( "no mailing list found for "+address+" - bouncing mail to "+returnPath + ":\n" + mail);
542 } else {
543 if( ml != null )
544 logger.error("mailing list already set");
545 ml = candidate;
546 }
547 }
548 return ml;
549 }
550
551 private static String extractDomain(String email) {
552 String domain = email.substring(email.indexOf('@')+1).toLowerCase();
553 // hack to unify google messages
554 return domain.replace("google.com","googlegroups.com");
555 }
556
557 private static boolean checkForward(Mail mail, MailingListImpl ml) {
558 // check if the archive guessed from subscription address is not presented in the
559 // common headers that contain list address, forward to the archive owner in this case
560 String envTo[] = mail.getHeader("Envelope-To");
561 if (envTo == null)
562 envTo = mail.getHeader("X-Delivered-to"); // fastmail
563 if (envTo == null)
564 envTo = mail.getHeader("X-Original-To"); // postfix
565 String originalTo = envTo[0];
566 {
567 MailAddress[] to = mail.getTo();
568 if( to==null || to.length!=1 || !to[0].getAddrSpec().equalsIgnoreCase(originalTo) )
569 return false;
570 }
571 // check for domain of the message's From: or Reply-To:
572 String listAddress = ml.getListAddress();
573 String domain = extractDomain(listAddress);
574 String maintenanceMessageReplyTo = null;
575 {
576 MailAddress[] replyTos = mail.getReplyTo();
577 if (replyTos != null) {
578 for (MailAddress replyTo : replyTos) {
579 String replyDomain = extractDomain(replyTo.getAddrSpec());
580 if (replyDomain.endsWith(domain) || domain.endsWith(replyDomain)) {
581 maintenanceMessageReplyTo = replyTo.getAddrSpec();
582 break;
583 }
584 }
585 }
586 }
587 MailAddress from = mail.getFrom();
588 // first we compare the domains
589 if( maintenanceMessageReplyTo == null && from != null && (extractDomain(from.getAddrSpec()).endsWith(domain) || domain.endsWith(extractDomain(from.getAddrSpec()))))
590 maintenanceMessageReplyTo = from.getAddrSpec();
591 // check if this is a majordomo email
592 if (maintenanceMessageReplyTo == null && from != null && from.getAddrSpec().toLowerCase().startsWith("majordomo@"))
593 maintenanceMessageReplyTo = from.getAddrSpec();
594
595 if( maintenanceMessageReplyTo != null ) {
596 mail.setReplyTo( new MailAddress(fwdEmail(originalTo, maintenanceMessageReplyTo) ));
597 MailAddress ownerAddress = getArchiveOwnerAddress(ml);
598 MailHome.getDefaultSmtpServer().send(mail,ownerAddress);
599 logger.info("Forwarding maintenance message to owner: " + ownerAddress);
600 logger.info(mail.getRawInput());
601 return true;
602 }
603 if( MailSubsystem.getReturnPath(mail).equals("") ) {
604 MailAddress ownerAddress = getArchiveOwnerAddress(ml);
605 MailHome.getDefaultSmtpServer().send(mail,ownerAddress);
606 logger.info("Forwarding maintenance message to owner: " + ownerAddress);
607 logger.info(mail.getRawInput());
608 return true;
609 }
610 logger.info("Bouncing email to: " + MailSubsystem.getReturnPath(mail) + " / envelopeTo = " + originalTo + "\n" + mail.getRawInput());
611 MailSubsystem.bounce(mail,
612 "Delivery to the following recipient failed permanently:\n\n "
613 + originalTo
614 + "\n\nThis email address is only for archiving mailing lists and should not be used directly.\n"
615 );
616 return true;
617 }
618
619 private static MailAddress getArchiveOwnerAddress(MailingListImpl mailingList) {
620 // If this list was exported to another server, we have to send this email
621 // to the person that did the export. Otherwise we send to the current owner.
622 String exportOwner = mailingList.getExportOwner();
623 if (exportOwner == null) {
624 // Send to the current owner...
625 User owner = mailingList.getForumImpl().getOwnerImpl();
626 return new MailAddress(owner.getEmail(), owner.getName());
627 } else {
628 // Send to the person who exported the archive...
629 return new MailAddress(exportOwner);
630 }
631 }
632
633 private static MailAddress toMailAddress(String s) {
634 try {
635 InternetAddress ia = new InternetAddress(s);
636 return new MailAddress(ia.getAddress(),ia.getPersonal());
637 } catch(AddressException e) {
638 return null;
639 }
640 }
641
642 private static UserImpl getUser(Mail mail, MailingListImpl mailingList) {
643 MailAddress addr = null;
644 String a[] = mail.getHeader("X-Original-From");
645 if( a != null )
646 addr = toMailAddress(a[0]);
647 if( addr == null )
648 addr = mail.getFrom();
649 String email = addr.getAddrSpec();
650 if (email == null || "".equals(email.trim()))
651 {
652 throw new MailAddressException("Invalid sender address: "+addr);
653 }
654 SiteImpl site = mailingList.getForumImpl().getSiteImpl();
655 UserImpl user = site.getUserImplFromEmail(email);
656 if( user==null || !user.isRegistered() ) {
657 String username;
658 if( email.equalsIgnoreCase(mailingList.getListAddress()) ) {
659 username = mailingList.getForum().getSubject() + " mailing list";
660 } else {
661 username = addr.getDisplayName();
662 if( username == null || "".equals(username.trim()) ) {
663 username = email.indexOf('@')>0 ? email.substring(0, email.indexOf('@')) : email;
664 }
665 }
666 if( username.endsWith(" (JIRA)") ) {
667 username = "JIRA "+email;
668 }
669 if( user==null ) {
670 user = UserImpl.createGhost(site,email);
671 user.setNameLike(username,false);
672 user.insert();
673 } else {
674 String oldName = user.getName();
675 if( !oldName.toLowerCase().startsWith(username.toLowerCase())
676 && (Math.random() < nameChangeFreq)
677 ) {
678 user.setNameLike(username,false);
679 user.getDbRecord().update();
680 logger.warn("changed name of "+user+" from '"+oldName+"' to '"+user.getName()+"'");
681 }
682 }
683 }
684 return user;
685 }
686
687 static final MailMessageFormat msgFmt = new MailMessageFormat('m', "mail");
688
689 private static Node getPriorPost(final SiteImpl site,final LuceneSearcher searcher, long forumId, final String subject, Filter filter, final Date to, final Set offspring) throws IOException {
690 //String phrase = "\""+QueryParser.escape(subject.replace('\"',' '))+"\"";
691 try {
692 NodeSearcher.Builder query = new NodeSearcher.Builder(site,forumId);
693 query.addNodeKind(Node.Kind.POST);
694 QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,Lucene.SUBJECT_FLD, Lucene.analyzer);
695 parser.setDefaultOperator(QueryParser.AND_OPERATOR);
696 Query subjectQuery = parser.parse(QueryParser.escape(subject.replace('\"',' ').replace("&&"," ")));
697 if (! (subjectQuery instanceof BooleanQuery && ((BooleanQuery)subjectQuery).getClauses().length==0) )
698 query.addQuery(subjectQuery);
699 final Node[] resultHolder = new Node[1];
700 searcher.search( query.build().getQuery(), filter, new HitCollector() {
701 protected void process(Document doc) {
702 NodeImpl post = Lucene.node(site,doc);
703 if (post==null)
704 return;
705 String parentSubject = post.getSubject().toLowerCase();
706 if ( (parentSubject.equals(subject) || (parentSubject.startsWith("re: ") && parentSubject.substring(4).trim().equals(subject)))
707 && to.after(post.getWhenCreated())
708 && (resultHolder[0]==null || resultHolder[0].getWhenCreated().before(post.getWhenCreated()))
709 && !offspring.contains(post)
710 )
711 resultHolder[0] = post;
712 }
713 });
714 Node result = resultHolder[0];
715
716 if (result != null) {
717 // find the uppermost post with almost-the-same subject
718 String subjectEtalon = normalizeSubject(subject.toLowerCase());
719 Node resultCandidate = result.getTopic();
720 while (result != null) {
721 String resultSubject = normalizeSubject(result.getSubject().toLowerCase());
722 if (!resultSubject.equals(subjectEtalon)) break; // break when subject really changes
723
724 // this post has almost-the-same subject
725 if (!offspring.contains(result)) {
726 // set only if this node is not presented in escape-set
727 resultCandidate = result;
728 }
729 result = result.getParent();
730 }
731 result = resultCandidate;
732 }
733 return result;
734 } catch (ParseException e) {
735 throw new RuntimeException(e);
736 }
737 }
738
739 private static final Pattern bracketRegex = Pattern.compile("\\[[^\\[]+\\]");
740
741 static Pattern prefixRegex(String prefixes) {
742 return Pattern.compile( "^((" + prefixes + "): *)+" );
743 }
744
745 private static final Pattern defaultPrefixRegex = prefixRegex("re|aw|res|fwd|答复");
746
747 /**
748 * Remove from subject all possible prefixes which are added while forwarding, replying, etc...
749 *
750 * @param subject original subject
751 * @return normalized subject
752 */
753 private static String normalizeSubject(String subject) {
754 return normalizeSubject(subject,defaultPrefixRegex);
755 }
756
757 static String normalizeSubject(String subject,Pattern prefixRegex) {
758 if (subject != null) {
759 subject = subject.toLowerCase().trim();
760 subject = bracketRegex.matcher(subject).replaceAll("");
761 subject = prefixRegex.matcher(subject).replaceAll("");
762 }
763 return subject;
764 }
765
766 static void nop() {}
767
768
769 private static ListServer detectListServer(Mail mail) {
770 String[] mailman = mail.getHeader("X-Mailman-Version");
771 if (mailman!=null && mailman.length==1 && mailman[0]!=null) {
772 if (mailman[0].startsWith("2.0")) {
773 return ListServer.mailman20;
774 } else if (mailman[0].startsWith("2.1")) {
775 return ListServer.mailman21;
776 } else if (mailman[0].startsWith("2.")) {
777 logger.error("unknown mailman version: "+mailman[0]+" in message "+mail.getMessageID());
778 }
779 return null;
780 }
781
782 String[] mList = mail.getHeader("Mailing-List");
783 if (mList!=null && mList.length==1 && mList[0]!=null) {
784 if (mList[0].indexOf("run by ezmlm")>=0) {
785 return ListServer.ezmlm;
786 } else if (mList[0].indexOf("@yahoogr")>0 || mList[0].indexOf("@gruposyahoo")>0) {
787 return ListServer.yahoo;
788 } else if (mList[0].indexOf("@googlegroups")>0) {
789 return ListServer.google;
790 }
791 }
792
793 String[] listproc = mail.getHeader("X-Listprocessor-Version");
794 if (listproc!=null && listproc.length==1 && listproc[0]!=null) {
795 if (listproc[0].indexOf("ListProc")>=0) {
796 if (listproc[0].indexOf("CREN")>=0) {
797 return ListServer.listproc;
798 } else {
799 return ListServer.oldlistproc;
800 }
801 } else {
802 logger.error("unknown listproc version: "+listproc[0]+" in message "+mail.getMessageID());
803 return null;
804 }
805 }
806
807 String[] ecartis = mail.getHeader("X-ecartis-version");
808 if (ecartis!=null && ecartis.length==1 && ecartis[0]!=null) {
809 if (ecartis[0].indexOf("Ecartis")>=0) {
810 return ListServer.ecartis;
811 } else {
812 logger.error("unknown ecartis version: "+ecartis[0]+" in message "+mail.getMessageID());
813 return null;
814 }
815 }
816
817 String[] lyris = mail.getHeader("X-LISTMANAGER-Message-Id");
818 if (lyris!=null && lyris.length==1 && lyris[0]!=null) {
819 if (lyris[0].indexOf("LISTMANAGER")>=0) {
820 return ListServer.lyris;
821 } else {
822 logger.error("unexpected x-listmanager-message-id header: "+lyris[0]+" in message "+mail.getMessageID());
823 return null;
824 }
825 }
826
827 String[] xListServer = mail.getHeader("X-ListServer");
828 if (xListServer!=null && xListServer.length==1 && xListServer[0]!=null) {
829 if (xListServer[0].indexOf("CommuniGate")>=0) {
830 return ListServer.communigate;
831 } else {
832 logger.error("unknown x-listserver header: "+xListServer[0]+" in message "+mail.getMessageID());
833 return null;
834 }
835 }
836
837 // may not be reliable
838 String[] listSubscribe = mail.getHeader("List-Subscribe");
839 if (listSubscribe!=null && listSubscribe.length==1 && listSubscribe[0]!=null) {
840 if (listSubscribe[0].indexOf("+subscribe@")>0) {
841 return ListServer.mlmmj;
842 } else if (listSubscribe[0].indexOf("listserver@")>=0) {
843 return ListServer.listserver;
844 } else if (listSubscribe[0].indexOf("sympa@")>=0) {
845 return ListServer.sympa;
846 }
847 }
848
849 // not possible to detect:
850 // listserv
851 // majordomo / majordomo2
852 // smartlist
853
854 return null;
855 }
856 /*
857 static void redoGuessedParents() {}
858
859 public static void rethreadPosts(boolean inBatch) throws SQLException{
860 rethreadPosts(1, 0, inBatch);
861 }
862
863 public static void rethreadPosts(long startingPostId, boolean inBatch) throws SQLException{
864 rethreadPosts(startingPostId, 0, inBatch);
865 }
866
867 static void rethreadPosts(long startingPostId, long forumId, boolean inBatch) throws SQLException{
868 Logger batchLog = inBatch ? Batch.logger : logger;
869 if(startingPostId > 1)
870 batchLog.info("Starting rethread from post " + startingPostId);
871 if(forumId > 0)
872 batchLog.info("Rethreading for forum " + forumId);
873
874 //the WHERE condition is post_id > postId !
875 long postId = startingPostId - 1;
876 int processed_post_count = 0;
877 int modified_post_count = 0;
878 int null_parent_count = 0;
879 NodeImpl post = null;
880 boolean more = true;
881 try {
882 outer: while (more) {
883 Connection con = Db.db.getConnection();
884 try {
885 con.setAutoCommit(false);
886 PreparedStatement stmt = con.prepareStatement(
887 (forumId > 0) ?
888 "SELECT * FROM descendants(" + forumId + ") WHERE node_id > ? AND " +
889 "(guessed_parent='t' OR guessed_parent is null) AND " +
890 "msg_fmt='m'" +
891 "ORDER BY node_id LIMIT 1"
892 :
893 "SELECT * FROM node WHERE node_id > ? AND " +
894 "(guessed_parent='t' OR guessed_parent is null) AND " +
895 "msg_fmt='m'" +
896 "ORDER BY node_id LIMIT 1"
897 );
898 stmt.setLong(1, postId);
899 ResultSet rs = stmt.executeQuery();
900 more = false;
901 while (rs.next()) {
902 more = true;
903 post = NodeImpl.getNode(rs);
904 try {
905 postId = post.getId();
906 Mail mail = MailHome.newMail(post.getMessage().getRaw());
907 MailingListImpl mailingList = post.getAppImpl().getAssociatedMailingListImpl();
908 if (mailingList==null) continue; // forum no longer a mailing list
909 List<NodeImpl> descendants = new ArrayList<NodeImpl>();
910 for( NodeImpl n : post.getDescendantImpls() ) {
911 descendants.add(n);
912 }
913 NodeImpl parent = guessParent(mail, post.getWhenCreated(), mailingList, post.getSubject(), descendants.toArray(new NodeImpl[0]));
914
915 if (parent != null && parent.getId() != post.getParentId()) {
916 try {
917 batchLog.debug("setting parent of " + post + " to " + parent);
918 post.setGuessedParent(parent);
919 if(++ modified_post_count % 100 == 0)
920 batchLog.info("Modified " + modified_post_count + " posts");
921 } catch (ModelException.NodeLoop e) {
922 batchLog.error("",e);
923 }
924 } else if(parent == null && post.getParentId() != 0 && post.getParent().getKind()!=Node.Kind.APP){
925 batchLog.info("Null parent at " + post);
926 null_parent_count ++;
927 }
928 } catch(Exception x){
929 batchLog.error("Exception at post " + post, x);
930 break outer;
931 }
932
933 if(++ processed_post_count % 3000 == 0)
934 batchLog.info("Processed " + processed_post_count + " posts, current postId: "+postId);
935
936 if (inBatch) {
937 Batch.checkStopped();
938 }
939 }
940 stmt.close();
941 con.commit();
942 } finally {
943 con.close();
944 }
945 }
946 } finally {
947 batchLog.info("Exited at post " + post);
948 batchLog.info("Processed " + processed_post_count + " posts");
949 batchLog.info("Modified " + modified_post_count + " posts");
950 batchLog.info("Guessed null parent at " + null_parent_count + " posts");
951 }
952 }
953 */
954 private static void getRethreadIds(Connection con,long parentId,Collection<Long> ids)
955 throws SQLException
956 {
957 PreparedStatement stmt = con.prepareStatement(
958 "select node_id, guessed_parent, msg_fmt from node where parent_id = ?"
959 );
960 stmt.setLong(1,parentId);
961 ResultSet rs = stmt.executeQuery();
962 while( rs.next() ) {
963 long id = rs.getLong("node_id");
964 if( "m".equals(rs.getString("msg_fmt"))
965 && ( rs.getBoolean("guessed_parent") || rs.wasNull() )
966 )
967 ids.add(id);
968 getRethreadIds(con,id,ids);
969 }
970 rs.close();
971 stmt.close();
972 }
973
974 static void rethreadForum(NodeImpl forum, boolean inBatch) throws SQLException{
975 long forumId = forum.getId();
976 long rethreadStart = System.currentTimeMillis();
977 Logger batchLog = inBatch ? Batch.logger : logger;
978 batchLog.info("Rethreading for forum " + forumId);
979 SiteKey siteKey = forum.getSiteImpl().siteKey;
980 DbDatabase db = siteKey.getDb();
981
982 Collection<Long> ids = new ArrayList<Long>();
983 {
984 Connection con = db.getConnection();
985 long queryStart = System.currentTimeMillis();
986 getRethreadIds(con,forumId,ids);
987 batchLog.info("Query took " + (System.currentTimeMillis() - queryStart) + " ms");
988 con.close();
989 }
990
991 batchLog.info(ids.size() + " posts to process...");
992
993 //the WHERE condition is post_id > postId !
994 int processed_post_count = 0;
995 int modified_post_count = 0;
996 int null_parent_count = 0;
997
998 try {
999 while (ids.size() > 0) {
1000 Connection con = db.getConnection();
1001 try {
1002 con.setAutoCommit(false);
1003
1004 long id = ids.iterator().next();
1005 ids.remove(id);
1006 NodeImpl post = NodeImpl.getNode(siteKey,id);
1007 try {
1008 Mail mail = MailHome.newMail(post.getMessage().getRaw());
1009 MailingListImpl mailingList = post.getAppImpl().getAssociatedMailingListImpl();
1010 if (mailingList==null) continue; // forum no longer a mailing list
1011 List<NodeImpl> descendants = new ArrayList<NodeImpl>();
1012 for( NodeImpl n : post.getDescendantImpls() ) {
1013 descendants.add(n);
1014 }
1015 NodeImpl parent = guessParent(mail, post.getWhenCreated(), mailingList, post.getSubject(), descendants.toArray(new NodeImpl[0]));
1016
1017 if (parent != null && parent.getId() != post.getParentId()) {
1018 try {
1019 batchLog.info("setting parent of " + post + " to " + parent);
1020 post.setGuessedParent(parent);
1021
1022 if(++ modified_post_count % 1000 == 0)
1023 batchLog.info("Modified " + modified_post_count + " posts");
1024 } catch (ModelException.NodeLoop e) {
1025 batchLog.error("",e);
1026 }
1027 } else if(parent == null && post.getParentId() != 0 && post.getParent().getKind()!=Node.Kind.APP){
1028 batchLog.info("Null parent at " + post);
1029 null_parent_count ++;
1030 }
1031 } catch(Exception x){
1032 batchLog.error("Exception at " + post + " - message:\n"+post.getMessage().getRaw(), x);
1033 break;
1034 }
1035
1036 if(++ processed_post_count % 1000 == 0)
1037 batchLog.info("Processed " + processed_post_count + " posts, current postId: "+id);
1038
1039 if (inBatch) {
1040 Batch.checkStopped();
1041 }
1042 con.commit();
1043 } finally {
1044 con.close();
1045 }
1046 }
1047 } finally {
1048 batchLog.info("Rethread took " + (System.currentTimeMillis() - rethreadStart) + " ms");
1049 batchLog.info("Processed " + processed_post_count + " posts");
1050 batchLog.info("Modified " + modified_post_count + " posts");
1051 batchLog.info("Guessed null parent at " + null_parent_count + " posts");
1052 }
1053 }
1054
1055
1056 /**
1057 * Get or create message id from an email
1058 *
1059 * @param mail email message
1060 * @param msgFmt message format to use
1061 * @return message id, never null
1062 */
1063 private static String getMessageID(Mail mail, Message.Format msgFmt) {
1064 String[] messageIds = mail.getHeader("Message-Id"); // returns both Id and ID
1065 if (messageIds == null || messageIds.length == 0 || messageIds[messageIds.length - 1] == null) {
1066 return calcMessageID(mail, msgFmt);
1067 } else {
1068 return MailSubsystem.stripBrackets(messageIds[messageIds.length - 1]);
1069 }
1070 }
1071
1072 /**
1073 * Create a new message if for an email message
1074 *
1075 * @param mail mail message to process
1076 * @param msgFmt message format to use
1077 * @return a new message id, never null
1078 */
1079 private static String calcMessageID(Mail mail, Message.Format msgFmt) {
1080 StringBuilder msgId = new StringBuilder();
1081 msgId.append("MissingID.");
1082 String text = msgFmt.getText(mail.getRawInput(),null);
1083 msgId.append(Integer.toHexString(text.hashCode()));
1084 MailAddress from = mail.getFrom();
1085 if (from != null) msgId.append(Integer.toHexString(from.toString().hashCode()));
1086 MailAddress[] to = mail.getTo();
1087 if (to != null && to.length > 0) msgId.append(Integer.toHexString(to[0].toString().hashCode()));
1088 Date date = mail.getSentDate();
1089 if (date != null) msgId.append(Integer.toHexString(date.hashCode()));
1090 String subject = mail.getSubject();
1091 if (subject != null) msgId.append(Integer.toHexString(subject.hashCode()));
1092 msgId.append("@nabble.com");
1093 return msgId.toString();
1094 }
1095
1096
1097
1098
1099
1100 private static final Pop3Server fwdPop3Server = (Pop3Server)Init.get("fwdPop3Server");
1101
1102 private static class Lazy {
1103 static final String emailPrefix;
1104 static final String emailSuffix;
1105 static final Pattern pattern;
1106 static {
1107 String addrSpec = fwdPop3Server.getUsername();
1108 int ind = addrSpec.indexOf('@');
1109 emailPrefix = addrSpec.substring(0, ind) + "+";
1110 emailSuffix = addrSpec.substring(ind);
1111 pattern = Pattern.compile(
1112 Pattern.quote(emailPrefix) + "([^@]+)\\+([^@]+)\\+([^@]+)" + Pattern.quote(emailSuffix)
1113 , Pattern.CASE_INSENSITIVE
1114 );
1115 }
1116 }
1117
1118 private static void processFwds() {
1119 if( fwdPop3Server == null ) {
1120 logger.error("fwdPop3Server not defined");
1121 System.exit(-1);
1122 }
1123 MailIterator mails = fwdPop3Server.getMail();
1124 try {
1125 while( mails.hasNext() ) {
1126 Mail mail = mails.next();
1127 try {
1128 fwdMail(mail);
1129 } catch (Exception e) {
1130 logger.error("mail:\n"+mail.getRawInput(),e);
1131 }
1132 }
1133 } finally {
1134 mails.close();
1135 }
1136 }
1137
1138 private static void fwdMail(Mail mail) {
1139 String[] envTo = mail.getHeader("Envelope-To");
1140 if (envTo == null)
1141 envTo = mail.getHeader("X-Delivered-to"); // fastmail
1142 if (envTo == null)
1143 envTo = mail.getHeader("X-Original-To"); // postfix
1144 String originalTo = envTo[0];
1145 Matcher matcher = Lazy.pattern.matcher(originalTo);
1146 if( !matcher.matches() )
1147 throw new RuntimeException("invalid email: "+originalTo);
1148 String fwdFrom = emailDecode(matcher.group(1));
1149 String fwdTo = emailDecode(matcher.group(2));
1150 if( (fwdFrom+fwdTo).hashCode() != Integer.parseInt(matcher.group(3)) )
1151 throw new RuntimeException("invalid hash: "+originalTo);
1152 mail.setFrom(new MailAddress(fwdFrom));
1153 mail.setTo(new MailAddress(fwdTo));
1154 logger.info("Forwarding email to mailing list: " + fwdTo);
1155 MailHome.getDefaultSmtpServer().send(mail);
1156 }
1157
1158 private static String fwdEmail(String from,String to) {
1159 return Lazy.emailPrefix + emailEncode(from) + '+' + emailEncode(to) + '+' + (from+to).hashCode() + Lazy.emailSuffix;
1160 }
1161
1162 private static String emailEncode(String s) {
1163 return HtmlUtils.urlEncode(s).replace('%','~');
1164 }
1165
1166 private static String emailDecode(String s) {
1167 return HtmlUtils.urlDecode(s.replace('~','%'));
1168 }
1169
1170 }