Mercurial Hosting > nabble
diff src/nabble/model/MailingLists.java @ 0:7ecd1a4ef557
add content
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 21 Mar 2019 19:15:52 -0600 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/nabble/model/MailingLists.java Thu Mar 21 19:15:52 2019 -0600 @@ -0,0 +1,1170 @@ +package nabble.model; + +import fschmidt.db.DbDatabase; +import fschmidt.db.DbNull; +import fschmidt.util.java.DateUtils; +import fschmidt.util.java.HtmlUtils; +import fschmidt.util.mail.Mail; +import fschmidt.util.mail.MailAddress; +import fschmidt.util.mail.MailAddressException; +import fschmidt.util.mail.MailEncodingException; +import fschmidt.util.mail.MailException; +import fschmidt.util.mail.MailHome; +import fschmidt.util.mail.MailIterator; +import fschmidt.util.mail.MailParseException; +import fschmidt.util.mail.Pop3Server; +import fschmidt.util.mail.javamail.MstorInServer; +import nabble.model.lucene.HitCollector; +import nabble.model.lucene.LuceneSearcher; +import org.apache.lucene.document.Document; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.Version; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.UnsupportedEncodingException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.TimeZone; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.mail.internet.InternetAddress; +import javax.mail.internet.AddressException; + + +final class MailingLists { + private static final Logger logger = LoggerFactory.getLogger(MailingLists.class); + + private static final float nameChangeFreq = Init.get("mlNameChangeFreq",0.1f); + static final Pop3Server pop3Server = (Pop3Server)Init.get("mailingListArchivePop3Server"); + + private MailingLists() {} // never + + static { + if( Init.hasDaemons ) { + runMailingLists(); + } + } + + private static void runMailingLists() { + if( pop3Server == null ) { + logger.warn("no pop3 server defined, mailing lists not running"); + return; + } + Executors.scheduleWithFixedDelay(new Runnable() { + public void run(){ + try { + processMail(); + processFwds(); + } catch(MailException e) { + logger.error("mailing list processing",e); + } + } + }, 10, 10, TimeUnit.SECONDS ); + logger.info("mailing lists enabled"); + } + + private static void processMail() { + MailIterator mails = pop3Server.getMail(); + int count = 0; + try { + while( mails.hasNext() ) { + Mail mail = mails.next(); + try { + makePost(mail); + count++; + } catch (MailAddressException e) { + logger.warn("mail:\n"+mail.getRawInput(),e); // screwed-up mail + } catch (Exception e) { + logger.error("mail:\n"+mail.getRawInput(),e); + } + } + } finally { + mails.close(); + if( count > 0 ) + logger.error("Processed " + count + " emails."); + } + } + + static MailingList.ImportResult importMbox(File file,MailingListImpl ml,String mailErrorsToS,int maxErrors) + throws ModelException + { + final DateFormat mailmanDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy"); + final DateFormat mailDateFormat = new javax.mail.internet.MailDateFormat(); + mailmanDateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); + mailDateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); + MailAddress mailErrorsTo = new MailAddress(mailErrorsToS); + MstorInServer server = new MstorInServer(file); + server.setMetaEnabled(false); + MailIterator mails = server.getMail(); + try { + int imported = 0; + int errors = 0; + while( mails.hasNext() ) { + Mail mail = mails.next(); + try { + try { + mail.getFrom(); + } catch (MailAddressException e) { + String[] from = mail.getHeader("From"); + if (from == null || from.length == 0) + throw new MailAddressException("'From' not found in the header", e); + mail.setHeader("From", from[0].replace(" at ", "@")); + } + Date sentDate = mail.getSentDate(); + if ((sentDate==null || sentDate.getTime() < 0) && mail.getHeader("Date")!=null) { + String dateH = mail.getHeader("Date")[0]; + if (dateH!=null) { + try { + sentDate = mailmanDateFormat.parse(dateH); + } catch (java.text.ParseException e) {} + if (sentDate!=null) + mail.setSentDate(sentDate); + } + } + if ((sentDate==null || sentDate.getTime() < 0) && mail.getHeader("Resent-date")!=null) { + String dateH = mail.getHeader("Resent-date")[0]; + if (dateH!=null) { + try { + sentDate = mailDateFormat.parse(dateH); + } catch (java.text.ParseException e) {} + if (sentDate!=null) + mail.setSentDate(sentDate); + } + } + if ((sentDate==null || sentDate.getTime() < 0)) { + String rawInput = mail.getRawInput(); + try { + String dateH = rawInput.substring(rawInput.indexOf(' ',5), rawInput.indexOf('\n')).trim(); + sentDate = mailmanDateFormat.parse(dateH); + } catch (Exception e) { + logger.error("",e); // what kind of exception is ok? + } + if (sentDate!=null) + mail.setSentDate(sentDate); + } + makeForumPost(mail,ml,true); + imported++; + } catch (Exception e) { + sendErrorMail(mail, e, mailErrorsTo); + errors++; + if( errors >= maxErrors ) + throw ModelException.newInstance("import_mbox_errors",""+errors+" errors reached after importing "+imported+" messages"); + } + } + final int imported2 = imported; + final int errors2 = errors; + return new MailingList.ImportResult() { + public int getImported() { return imported2; } + public int getErrors() { return errors2; } + }; + } finally { + mails.close(); + } + } + + private static void makePost(Mail mail) + throws ModelException + { + MailingListImpl ml = getMailingList(mail); + if (ml == null) { + logger.info("Mailing list not found for: " + Arrays.asList(mail.getTo())); + return; + } + if (checkForward(mail, ml)) { + return; + } + if (checkPending(mail, ml)) { + return; + } + makeForumPost(mail, ml, false); + } + + private static void makeForumPost(Mail mail, MailingListImpl ml, boolean isImport) + throws ModelException + { + String messageID = getMessageID(mail, msgFmt); + mail.setMessageID(messageID); + + String message = mail.getRawInput(); + message = message.replace("\000",""); // postgres can't handle 0 + if( !msgFmt.isOk(message) ) + return; + String text = msgFmt.getMailText(message,null); + NodeImpl forum = ml.getForumImpl(); + + if( doNotArchive(text) || (doNotArchive(mail) && !ml.ignoreNoArchive()) ) { + logger.info("XNoArchive in "+forum.getSubject()); + return; + } + + DbDatabase db; + try { + db = forum.siteKey.getDb(); + } catch(UpdatingException e) { + return; // hack for schema migration + } + db.beginTransaction(); + try { + forum = (NodeImpl)forum.getGoodCopy(); + MailingListImpl mailingList = forum.getMailingListImpl(); + + { + NodeImpl post = forum.getNodeImplFromMessageID(messageID); + if( post != null) { + if(isImport) + return; + throw new RuntimeException("MessageID "+messageID+" already in db for forum "+forum.getId()); + } + } + + UserImpl user = getUser(mail, mailingList); + if (user.isNoArchive()) + return; + + String subject = mailingList.fixSubject(mail.getSubject()); + if( subject==null || subject.trim().equals("") ) + subject = "(no subject)"; + + if (!isImport) { + ListServer oldListServer = mailingList.getListServer(); + if (oldListServer==ListServer.unknown || oldListServer instanceof ListServer.Mailman) { + ListServer listServer = detectListServer(mail); + if (listServer!=null && listServer!=oldListServer && (oldListServer==ListServer.unknown || listServer==ListServer.mailman21)) { + mailingList.setListServer(listServer); + mailingList.update(); + } + } + } + + Date now = new Date(); + Date date = mail.getSentDate(); + if( date==null || date.compareTo(now) > 0 || date.getTime() < 0) + date = now; + + boolean isGuessedParent = false; + String parentID = getParentID(mail, messageID); + NodeImpl parent = forum.getNodeImplFromMessageID(parentID); + if ( parent!=null && threadBySubject(forum, subject, parent.getSubject()) ) { + parent = null; + } + + NodeImpl[] orphans = NodeImpl.getFromParentID(messageID,mailingList); + if ( parent==null ) { + try { + parent = guessParent(mail, date, mailingList, subject, orphans); + if ( parent != null ) + isGuessedParent = true; + } catch(IOException e) { + logger.error("guessParent failed",e); + } + } + + NodeImpl post = NodeImpl.newChildNode(Node.Kind.POST,user,subject,message,msgFmt,parent==null?forum:parent); + if( parent==null && parentID != null ) { + logger.debug("Orphan "+messageID+" starting new thread "); + isGuessedParent = true; + } + + post.setWhenCreated(date); + post.setMessageID(messageID); + if (isGuessedParent) { + post.setGuessedParent(parentID); + } else if (parent==null) { + // for root posts which do not have parentID set guess flag to uncertain + post.setGuessedParent((Boolean) null); + } + post.insert(false); + if( isGuessedParent && parentID==null ) + logger.debug("no parentID for "+post); + + for (NodeImpl orphan : orphans) { + try { + if (!threadBySubject(forum, subject, orphan.getSubject())) { + orphan.changeParentImpl(post); + } + } catch (ModelException.NodeLoop e) { + logger.error("", e); // should not happen now... + orphan.getDbRecord().fields().put("parent_message_id", DbNull.STRING); + orphan.getDbRecord().update(); + } + } + + db.commitTransaction(); + } finally { + db.endTransaction(); + } + } + + private static boolean threadBySubject(Node forum, String subject, String parentSubject) { + if (!forumsThreadedBySubject.contains(forum.getId())) return false; + return ! normalizeSubject(subject).equals(normalizeSubject(parentSubject)); + } + + static final Set<Long> forumsThreadedBySubject = new HashSet<Long>(Arrays.asList((Long[])Init.get("forumsThreadedBySubject", new Long[0]))); + + + private static void sendErrorMail(Mail mail, Exception e, MailAddress mailTo) { + if( e instanceof UnsupportedEncodingException + || e instanceof MailAddressException + || e instanceof MailEncodingException + || e instanceof MailParseException + ) { + logger.info(e.toString()); + } else { + logger.error("",e); + } + StringWriter sb = new StringWriter(); + PrintWriter out = new PrintWriter(sb); + e.printStackTrace( out ); + out.close(); + String msg = e.getMessage(); + if (msg!=null && msg.indexOf('\n')>=0) msg = msg.substring(0, msg.indexOf('\n')).trim(); + String subject = "error: "+msg; + MailSubsystem.sendErrorMail(mail, mailTo, subject, sb.toString()); + } + + private static String getParentID(Mail mail, String messageID) { + String[] inReplyTos = mail.getHeader("In-Reply-To"); + if( inReplyTos == null ) { + inReplyTos = mail.getHeader("In-Reply-to"); + if( inReplyTos != null ) + logger.error("does this happen - case sensitive"); + } + if( inReplyTos != null ) { + for (String inReplyTo : inReplyTos) { + for( String s : MailSubsystem.stripMultiBrackets(inReplyTo) ) { + if (!s.equals(messageID) && !s.equals("")) return s; + } + } + } + try { + String[] refs = mail.getHeader("References"); + if( refs != null ) { + for (String ref : refs) { + List<String> list = MailSubsystem.stripMultiBrackets(ref); + if( list.isEmpty() ) + continue; + String s = list.get(list.size()-1); + if (!s.equals(messageID) && !s.equals("")) return s; + } + } + } catch(MailParseException e) { + logger.warn("screwed up References for messageID="+messageID,e); + } + return null; + } + + private static NodeImpl guessParent(Mail mail, Date date, MailingListImpl mailingList, String subject, NodeImpl[] orphans) throws IOException { + Set<NodeImpl> offspring = new HashSet<NodeImpl>(); + for (NodeImpl orphan : orphans) { + for (NodeImpl n : orphan.getDescendantImpls()) { + offspring.add(n); + } + } + return guessParent(mail, date, mailingList, subject, offspring); + } + + static NodeImpl guessParent(NodeImpl post, Collection<NodeImpl> ignore) { + Mail mail = MailHome.newMail(post.getMessage().getRaw()); + NodeImpl forum = post.getAppImpl(); + if (forum == null) { + return null; // detached post + } + MailingListImpl mailingList = forum.getAssociatedMailingListImpl(); + if (mailingList == null) { + return null; // forum no longer a mailing list + } + try { + Set<NodeImpl> ignoreSet = new HashSet<NodeImpl>(); + for( NodeImpl n : post.getDescendantImpls() ) { + ignoreSet.add(n); + } + if (ignore != null) { + ignoreSet.addAll(ignore); + } + return guessParent(mail, post.getWhenCreated(), mailingList, post.getSubject(), ignoreSet); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static NodeImpl guessParent(Mail mail, Date date, MailingListImpl mailingList, String subject, Set<NodeImpl> offspring) + throws IOException { + // attach to ancestor if any + NodeImpl forum = mailingList.getForumImpl(); + try { + String[] refs = mail.getHeader("References"); + if( refs != null ) { + for( String ref : refs ) { + final List<String> list = MailSubsystem.stripMultiBrackets(ref); + for( int i=list.size()-1; i>=0; i-- ) { + String ancestorID = list.get(i); + NodeImpl parent = forum.getNodeImplFromMessageID(ancestorID); + if (parent!=null && !offspring.contains(parent) && !threadBySubject(forum,subject,parent.getSubject())) { + logger.debug("Attaching orphan "+mail.getMessageID()+" to grandparent "+parent); + return parent; + } + } + } + } + } catch(MailParseException e) { + logger.warn("screwed up References",e); + } + // handle lost In-Reply-To headers + // heuristics - use Thread-Topic header to find matching subjects in last 3 days + String[] threadTopics = mail.getHeader("Thread-Topic"); + String threadTopic = threadTopics==null?null:mailingList.fixSubject(threadTopics[0]); + long forumId = forum.getId(); + Filter filter = Lucene.getRangeFilter(DateUtils.addDays(date, -7), date); + SiteImpl site = forum.getSiteImpl(); + LuceneSearcher searcher = Lucene.newSearcher(site); + try { + if( threadTopic != null ) { + threadTopic = threadTopic.toLowerCase(); + if (threadTopic.startsWith("re: ")) + threadTopic = threadTopic.substring(4); + threadTopic = threadTopic.trim(); + if (!threadTopic.equals("")) { + NodeImpl parent = (NodeImpl)getPriorPost(site,searcher, forumId, threadTopic, filter, date, offspring); + if( parent!=null && !offspring.contains(parent) && !threadBySubject(forum,subject,parent.getSubject())) return parent; + } + } + // if no thread-topic, but subject starts with Re:, try with subject + subject = subject.toLowerCase(); + if( subject.startsWith("re: ") ) { + subject = subject.substring(4).trim(); + if ( !subject.equals(threadTopic) && !"".equals(subject) ) { + NodeImpl parent = (NodeImpl)getPriorPost(site,searcher, forumId, subject, filter, date, offspring); + if( parent!=null && !offspring.contains(parent)) return parent; + } + } + } finally { + searcher.close(); + } + return null; + } + + private static boolean checkPending(Mail mail, MailingListImpl ml) { + if (checkPending(getMessageID(mail, msgFmt), ml)) { + return true; + } + String[] xMessageId = mail.getHeader("X-Message-Id"); + if (xMessageId != null && xMessageId.length > 0 && checkPending(MailSubsystem.stripBrackets(xMessageId[0]), ml)) { + return true; + } + xMessageId = mail.getHeader("X-Original-Message-Id"); + if (xMessageId != null && xMessageId.length > 0 && checkPending(MailSubsystem.stripBrackets(xMessageId[0]), ml)) { + return true; + } + return false; + } + + private static boolean checkPending(String messageID, MailingListImpl ml) { + NodeImpl pendingPost = ml.getForumImpl().getNodeImplFromMessageID(messageID); + if( pendingPost==null ) + return false; + Node.MailToList mail = pendingPost.getMailToList(); + if( mail == null ) { + logger.warn("MessageID "+messageID+" already in db as "+pendingPost+" for forum "+ml.getId()); + } else if( !mail.isPending() ) { + logger.error("post not pending "+pendingPost); + } else { + mail.clearPending(); + } + return true; + } + + private static boolean doNotArchive(Mail mail) { + String[] xNoArchive = mail.getHeader("X-No-Archive"); + if (xNoArchive != null && xNoArchive.length > 0 && "yes".equalsIgnoreCase(xNoArchive[0])) { + return true; + } + String[] xArchive = mail.getHeader("X-Archive"); + if (xArchive != null && xArchive.length > 0 && xArchive[0] != null && (xArchive[0].startsWith("expiry") || "no".equalsIgnoreCase(xArchive[0]))) { + return true; + } + String[] archive = mail.getHeader("Archive"); + if (archive != null && archive.length > 0 && "no".equalsIgnoreCase(archive[0])) { + return true; + } + return false; + } + + private static final Pattern xNoArchivePtn = Pattern.compile("(?im)\\AX-No-Archive: yes *$"); + private static boolean doNotArchive(String text) { + return xNoArchivePtn.matcher(text).find(); + } + + private static MailingListImpl getMailingList(Mail mail) { + MailingListImpl ml = null; + String[] a = mail.getHeader("Envelope-To"); + if (a == null) + a = mail.getHeader("X-Delivered-to"); // fastmail + if (a == null) + a = mail.getHeader("X-Original-To"); // postfix + if( a.length > 1 ) + a = new String[] { a[0] }; + for( String address : a[0].split(",") ) { + address = address.trim(); + MailingListImpl candidate = MailingListImpl.getMailingListByEnvelopeAddress(address); + if (candidate == null) { + // escaped list mail, bounce mail + String returnPath = MailSubsystem.getReturnPath(mail); + if( returnPath.equals(address) ) + continue; // ignore spam + MailSubsystem.bounce(mail, + "Delivery to the following recipient failed permanently:\n\n " + + address + + "\n\nNo archive exists for this address.\n" + ); + logger.warn( "no mailing list found for "+address+" - bouncing mail to "+returnPath + ":\n" + mail); + } else { + if( ml != null ) + logger.error("mailing list already set"); + ml = candidate; + } + } + return ml; + } + + private static String extractDomain(String email) { + String domain = email.substring(email.indexOf('@')+1).toLowerCase(); + // hack to unify google messages + return domain.replace("google.com","googlegroups.com"); + } + + private static boolean checkForward(Mail mail, MailingListImpl ml) { + // check if the archive guessed from subscription address is not presented in the + // common headers that contain list address, forward to the archive owner in this case + String envTo[] = mail.getHeader("Envelope-To"); + if (envTo == null) + envTo = mail.getHeader("X-Delivered-to"); // fastmail + if (envTo == null) + envTo = mail.getHeader("X-Original-To"); // postfix + String originalTo = envTo[0]; + { + MailAddress[] to = mail.getTo(); + if( to==null || to.length!=1 || !to[0].getAddrSpec().equalsIgnoreCase(originalTo) ) + return false; + } + // check for domain of the message's From: or Reply-To: + String listAddress = ml.getListAddress(); + String domain = extractDomain(listAddress); + String maintenanceMessageReplyTo = null; + { + MailAddress[] replyTos = mail.getReplyTo(); + if (replyTos != null) { + for (MailAddress replyTo : replyTos) { + String replyDomain = extractDomain(replyTo.getAddrSpec()); + if (replyDomain.endsWith(domain) || domain.endsWith(replyDomain)) { + maintenanceMessageReplyTo = replyTo.getAddrSpec(); + break; + } + } + } + } + MailAddress from = mail.getFrom(); + // first we compare the domains + if( maintenanceMessageReplyTo == null && from != null && (extractDomain(from.getAddrSpec()).endsWith(domain) || domain.endsWith(extractDomain(from.getAddrSpec())))) + maintenanceMessageReplyTo = from.getAddrSpec(); + // check if this is a majordomo email + if (maintenanceMessageReplyTo == null && from != null && from.getAddrSpec().toLowerCase().startsWith("majordomo@")) + maintenanceMessageReplyTo = from.getAddrSpec(); + + if( maintenanceMessageReplyTo != null ) { + mail.setReplyTo( new MailAddress(fwdEmail(originalTo, maintenanceMessageReplyTo) )); + MailAddress ownerAddress = getArchiveOwnerAddress(ml); + MailHome.getDefaultSmtpServer().send(mail,ownerAddress); + logger.info("Forwarding maintenance message to owner: " + ownerAddress); + logger.info(mail.getRawInput()); + return true; + } + if( MailSubsystem.getReturnPath(mail).equals("") ) { + MailAddress ownerAddress = getArchiveOwnerAddress(ml); + MailHome.getDefaultSmtpServer().send(mail,ownerAddress); + logger.info("Forwarding maintenance message to owner: " + ownerAddress); + logger.info(mail.getRawInput()); + return true; + } + logger.info("Bouncing email to: " + MailSubsystem.getReturnPath(mail) + " / envelopeTo = " + originalTo + "\n" + mail.getRawInput()); + MailSubsystem.bounce(mail, + "Delivery to the following recipient failed permanently:\n\n " + + originalTo + + "\n\nThis email address is only for archiving mailing lists and should not be used directly.\n" + ); + return true; + } + + private static MailAddress getArchiveOwnerAddress(MailingListImpl mailingList) { + // If this list was exported to another server, we have to send this email + // to the person that did the export. Otherwise we send to the current owner. + String exportOwner = mailingList.getExportOwner(); + if (exportOwner == null) { + // Send to the current owner... + User owner = mailingList.getForumImpl().getOwnerImpl(); + return new MailAddress(owner.getEmail(), owner.getName()); + } else { + // Send to the person who exported the archive... + return new MailAddress(exportOwner); + } + } + + private static MailAddress toMailAddress(String s) { + try { + InternetAddress ia = new InternetAddress(s); + return new MailAddress(ia.getAddress(),ia.getPersonal()); + } catch(AddressException e) { + return null; + } + } + + private static UserImpl getUser(Mail mail, MailingListImpl mailingList) { + MailAddress addr = null; + String a[] = mail.getHeader("X-Original-From"); + if( a != null ) + addr = toMailAddress(a[0]); + if( addr == null ) + addr = mail.getFrom(); + String email = addr.getAddrSpec(); + if (email == null || "".equals(email.trim())) + { + throw new MailAddressException("Invalid sender address: "+addr); + } + SiteImpl site = mailingList.getForumImpl().getSiteImpl(); + UserImpl user = site.getUserImplFromEmail(email); + if( user==null || !user.isRegistered() ) { + String username; + if( email.equalsIgnoreCase(mailingList.getListAddress()) ) { + username = mailingList.getForum().getSubject() + " mailing list"; + } else { + username = addr.getDisplayName(); + if( username == null || "".equals(username.trim()) ) { + username = email.indexOf('@')>0 ? email.substring(0, email.indexOf('@')) : email; + } + } + if( username.endsWith(" (JIRA)") ) { + username = "JIRA "+email; + } + if( user==null ) { + user = UserImpl.createGhost(site,email); + user.setNameLike(username,false); + user.insert(); + } else { + String oldName = user.getName(); + if( !oldName.toLowerCase().startsWith(username.toLowerCase()) + && (Math.random() < nameChangeFreq) + ) { + user.setNameLike(username,false); + user.getDbRecord().update(); + logger.warn("changed name of "+user+" from '"+oldName+"' to '"+user.getName()+"'"); + } + } + } + return user; + } + + static final MailMessageFormat msgFmt = new MailMessageFormat('m', "mail"); + + private static Node getPriorPost(final SiteImpl site,final LuceneSearcher searcher, long forumId, final String subject, Filter filter, final Date to, final Set offspring) throws IOException { + //String phrase = "\""+QueryParser.escape(subject.replace('\"',' '))+"\""; + try { + NodeSearcher.Builder query = new NodeSearcher.Builder(site,forumId); + query.addNodeKind(Node.Kind.POST); + QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,Lucene.SUBJECT_FLD, Lucene.analyzer); + parser.setDefaultOperator(QueryParser.AND_OPERATOR); + Query subjectQuery = parser.parse(QueryParser.escape(subject.replace('\"',' ').replace("&&"," "))); + if (! (subjectQuery instanceof BooleanQuery && ((BooleanQuery)subjectQuery).getClauses().length==0) ) + query.addQuery(subjectQuery); + final Node[] resultHolder = new Node[1]; + searcher.search( query.build().getQuery(), filter, new HitCollector() { + protected void process(Document doc) { + NodeImpl post = Lucene.node(site,doc); + if (post==null) + return; + String parentSubject = post.getSubject().toLowerCase(); + if ( (parentSubject.equals(subject) || (parentSubject.startsWith("re: ") && parentSubject.substring(4).trim().equals(subject))) + && to.after(post.getWhenCreated()) + && (resultHolder[0]==null || resultHolder[0].getWhenCreated().before(post.getWhenCreated())) + && !offspring.contains(post) + ) + resultHolder[0] = post; + } + }); + Node result = resultHolder[0]; + + if (result != null) { + // find the uppermost post with almost-the-same subject + String subjectEtalon = normalizeSubject(subject.toLowerCase()); + Node resultCandidate = result.getTopic(); + while (result != null) { + String resultSubject = normalizeSubject(result.getSubject().toLowerCase()); + if (!resultSubject.equals(subjectEtalon)) break; // break when subject really changes + + // this post has almost-the-same subject + if (!offspring.contains(result)) { + // set only if this node is not presented in escape-set + resultCandidate = result; + } + result = result.getParent(); + } + result = resultCandidate; + } + return result; + } catch (ParseException e) { + throw new RuntimeException(e); + } + } + + private static final Pattern bracketRegex = Pattern.compile("\\[[^\\[]+\\]"); + + static Pattern prefixRegex(String prefixes) { + return Pattern.compile( "^((" + prefixes + "): *)+" ); + } + + private static final Pattern defaultPrefixRegex = prefixRegex("re|aw|res|fwd|答复"); + + /** + * Remove from subject all possible prefixes which are added while forwarding, replying, etc... + * + * @param subject original subject + * @return normalized subject + */ + private static String normalizeSubject(String subject) { + return normalizeSubject(subject,defaultPrefixRegex); + } + + static String normalizeSubject(String subject,Pattern prefixRegex) { + if (subject != null) { + subject = subject.toLowerCase().trim(); + subject = bracketRegex.matcher(subject).replaceAll(""); + subject = prefixRegex.matcher(subject).replaceAll(""); + } + return subject; + } + + static void nop() {} + + + private static ListServer detectListServer(Mail mail) { + String[] mailman = mail.getHeader("X-Mailman-Version"); + if (mailman!=null && mailman.length==1 && mailman[0]!=null) { + if (mailman[0].startsWith("2.0")) { + return ListServer.mailman20; + } else if (mailman[0].startsWith("2.1")) { + return ListServer.mailman21; + } else if (mailman[0].startsWith("2.")) { + logger.error("unknown mailman version: "+mailman[0]+" in message "+mail.getMessageID()); + } + return null; + } + + String[] mList = mail.getHeader("Mailing-List"); + if (mList!=null && mList.length==1 && mList[0]!=null) { + if (mList[0].indexOf("run by ezmlm")>=0) { + return ListServer.ezmlm; + } else if (mList[0].indexOf("@yahoogr")>0 || mList[0].indexOf("@gruposyahoo")>0) { + return ListServer.yahoo; + } else if (mList[0].indexOf("@googlegroups")>0) { + return ListServer.google; + } + } + + String[] listproc = mail.getHeader("X-Listprocessor-Version"); + if (listproc!=null && listproc.length==1 && listproc[0]!=null) { + if (listproc[0].indexOf("ListProc")>=0) { + if (listproc[0].indexOf("CREN")>=0) { + return ListServer.listproc; + } else { + return ListServer.oldlistproc; + } + } else { + logger.error("unknown listproc version: "+listproc[0]+" in message "+mail.getMessageID()); + return null; + } + } + + String[] ecartis = mail.getHeader("X-ecartis-version"); + if (ecartis!=null && ecartis.length==1 && ecartis[0]!=null) { + if (ecartis[0].indexOf("Ecartis")>=0) { + return ListServer.ecartis; + } else { + logger.error("unknown ecartis version: "+ecartis[0]+" in message "+mail.getMessageID()); + return null; + } + } + + String[] lyris = mail.getHeader("X-LISTMANAGER-Message-Id"); + if (lyris!=null && lyris.length==1 && lyris[0]!=null) { + if (lyris[0].indexOf("LISTMANAGER")>=0) { + return ListServer.lyris; + } else { + logger.error("unexpected x-listmanager-message-id header: "+lyris[0]+" in message "+mail.getMessageID()); + return null; + } + } + + String[] xListServer = mail.getHeader("X-ListServer"); + if (xListServer!=null && xListServer.length==1 && xListServer[0]!=null) { + if (xListServer[0].indexOf("CommuniGate")>=0) { + return ListServer.communigate; + } else { + logger.error("unknown x-listserver header: "+xListServer[0]+" in message "+mail.getMessageID()); + return null; + } + } + + // may not be reliable + String[] listSubscribe = mail.getHeader("List-Subscribe"); + if (listSubscribe!=null && listSubscribe.length==1 && listSubscribe[0]!=null) { + if (listSubscribe[0].indexOf("+subscribe@")>0) { + return ListServer.mlmmj; + } else if (listSubscribe[0].indexOf("listserver@")>=0) { + return ListServer.listserver; + } else if (listSubscribe[0].indexOf("sympa@")>=0) { + return ListServer.sympa; + } + } + + // not possible to detect: + // listserv + // majordomo / majordomo2 + // smartlist + + return null; + } +/* + static void redoGuessedParents() {} + + public static void rethreadPosts(boolean inBatch) throws SQLException{ + rethreadPosts(1, 0, inBatch); + } + + public static void rethreadPosts(long startingPostId, boolean inBatch) throws SQLException{ + rethreadPosts(startingPostId, 0, inBatch); + } + + static void rethreadPosts(long startingPostId, long forumId, boolean inBatch) throws SQLException{ + Logger batchLog = inBatch ? Batch.logger : logger; + if(startingPostId > 1) + batchLog.info("Starting rethread from post " + startingPostId); + if(forumId > 0) + batchLog.info("Rethreading for forum " + forumId); + + //the WHERE condition is post_id > postId ! + long postId = startingPostId - 1; + int processed_post_count = 0; + int modified_post_count = 0; + int null_parent_count = 0; + NodeImpl post = null; + boolean more = true; + try { + outer: while (more) { + Connection con = Db.db.getConnection(); + try { + con.setAutoCommit(false); + PreparedStatement stmt = con.prepareStatement( + (forumId > 0) ? + "SELECT * FROM descendants(" + forumId + ") WHERE node_id > ? AND " + + "(guessed_parent='t' OR guessed_parent is null) AND " + + "msg_fmt='m'" + + "ORDER BY node_id LIMIT 1" + : + "SELECT * FROM node WHERE node_id > ? AND " + + "(guessed_parent='t' OR guessed_parent is null) AND " + + "msg_fmt='m'" + + "ORDER BY node_id LIMIT 1" + ); + stmt.setLong(1, postId); + ResultSet rs = stmt.executeQuery(); + more = false; + while (rs.next()) { + more = true; + post = NodeImpl.getNode(rs); + try { + postId = post.getId(); + Mail mail = MailHome.newMail(post.getMessage().getRaw()); + MailingListImpl mailingList = post.getAppImpl().getAssociatedMailingListImpl(); + if (mailingList==null) continue; // forum no longer a mailing list + List<NodeImpl> descendants = new ArrayList<NodeImpl>(); + for( NodeImpl n : post.getDescendantImpls() ) { + descendants.add(n); + } + NodeImpl parent = guessParent(mail, post.getWhenCreated(), mailingList, post.getSubject(), descendants.toArray(new NodeImpl[0])); + + if (parent != null && parent.getId() != post.getParentId()) { + try { + batchLog.debug("setting parent of " + post + " to " + parent); + post.setGuessedParent(parent); + if(++ modified_post_count % 100 == 0) + batchLog.info("Modified " + modified_post_count + " posts"); + } catch (ModelException.NodeLoop e) { + batchLog.error("",e); + } + } else if(parent == null && post.getParentId() != 0 && post.getParent().getKind()!=Node.Kind.APP){ + batchLog.info("Null parent at " + post); + null_parent_count ++; + } + } catch(Exception x){ + batchLog.error("Exception at post " + post, x); + break outer; + } + + if(++ processed_post_count % 3000 == 0) + batchLog.info("Processed " + processed_post_count + " posts, current postId: "+postId); + + if (inBatch) { + Batch.checkStopped(); + } + } + stmt.close(); + con.commit(); + } finally { + con.close(); + } + } + } finally { + batchLog.info("Exited at post " + post); + batchLog.info("Processed " + processed_post_count + " posts"); + batchLog.info("Modified " + modified_post_count + " posts"); + batchLog.info("Guessed null parent at " + null_parent_count + " posts"); + } + } +*/ + private static void getRethreadIds(Connection con,long parentId,Collection<Long> ids) + throws SQLException + { + PreparedStatement stmt = con.prepareStatement( + "select node_id, guessed_parent, msg_fmt from node where parent_id = ?" + ); + stmt.setLong(1,parentId); + ResultSet rs = stmt.executeQuery(); + while( rs.next() ) { + long id = rs.getLong("node_id"); + if( "m".equals(rs.getString("msg_fmt")) + && ( rs.getBoolean("guessed_parent") || rs.wasNull() ) + ) + ids.add(id); + getRethreadIds(con,id,ids); + } + rs.close(); + stmt.close(); + } + + static void rethreadForum(NodeImpl forum, boolean inBatch) throws SQLException{ + long forumId = forum.getId(); + long rethreadStart = System.currentTimeMillis(); + Logger batchLog = inBatch ? Batch.logger : logger; + batchLog.info("Rethreading for forum " + forumId); + SiteKey siteKey = forum.getSiteImpl().siteKey; + DbDatabase db = siteKey.getDb(); + + Collection<Long> ids = new ArrayList<Long>(); + { + Connection con = db.getConnection(); + long queryStart = System.currentTimeMillis(); + getRethreadIds(con,forumId,ids); + batchLog.info("Query took " + (System.currentTimeMillis() - queryStart) + " ms"); + con.close(); + } + + batchLog.info(ids.size() + " posts to process..."); + + //the WHERE condition is post_id > postId ! + int processed_post_count = 0; + int modified_post_count = 0; + int null_parent_count = 0; + + try { + while (ids.size() > 0) { + Connection con = db.getConnection(); + try { + con.setAutoCommit(false); + + long id = ids.iterator().next(); + ids.remove(id); + NodeImpl post = NodeImpl.getNode(siteKey,id); + try { + Mail mail = MailHome.newMail(post.getMessage().getRaw()); + MailingListImpl mailingList = post.getAppImpl().getAssociatedMailingListImpl(); + if (mailingList==null) continue; // forum no longer a mailing list + List<NodeImpl> descendants = new ArrayList<NodeImpl>(); + for( NodeImpl n : post.getDescendantImpls() ) { + descendants.add(n); + } + NodeImpl parent = guessParent(mail, post.getWhenCreated(), mailingList, post.getSubject(), descendants.toArray(new NodeImpl[0])); + + if (parent != null && parent.getId() != post.getParentId()) { + try { + batchLog.info("setting parent of " + post + " to " + parent); + post.setGuessedParent(parent); + + if(++ modified_post_count % 1000 == 0) + batchLog.info("Modified " + modified_post_count + " posts"); + } catch (ModelException.NodeLoop e) { + batchLog.error("",e); + } + } else if(parent == null && post.getParentId() != 0 && post.getParent().getKind()!=Node.Kind.APP){ + batchLog.info("Null parent at " + post); + null_parent_count ++; + } + } catch(Exception x){ + batchLog.error("Exception at " + post + " - message:\n"+post.getMessage().getRaw(), x); + break; + } + + if(++ processed_post_count % 1000 == 0) + batchLog.info("Processed " + processed_post_count + " posts, current postId: "+id); + + if (inBatch) { + Batch.checkStopped(); + } + con.commit(); + } finally { + con.close(); + } + } + } finally { + batchLog.info("Rethread took " + (System.currentTimeMillis() - rethreadStart) + " ms"); + batchLog.info("Processed " + processed_post_count + " posts"); + batchLog.info("Modified " + modified_post_count + " posts"); + batchLog.info("Guessed null parent at " + null_parent_count + " posts"); + } + } + + + /** + * Get or create message id from an email + * + * @param mail email message + * @param msgFmt message format to use + * @return message id, never null + */ + private static String getMessageID(Mail mail, Message.Format msgFmt) { + String[] messageIds = mail.getHeader("Message-Id"); // returns both Id and ID + if (messageIds == null || messageIds.length == 0 || messageIds[messageIds.length - 1] == null) { + return calcMessageID(mail, msgFmt); + } else { + return MailSubsystem.stripBrackets(messageIds[messageIds.length - 1]); + } + } + + /** + * Create a new message if for an email message + * + * @param mail mail message to process + * @param msgFmt message format to use + * @return a new message id, never null + */ + private static String calcMessageID(Mail mail, Message.Format msgFmt) { + StringBuilder msgId = new StringBuilder(); + msgId.append("MissingID."); + String text = msgFmt.getText(mail.getRawInput(),null); + msgId.append(Integer.toHexString(text.hashCode())); + MailAddress from = mail.getFrom(); + if (from != null) msgId.append(Integer.toHexString(from.toString().hashCode())); + MailAddress[] to = mail.getTo(); + if (to != null && to.length > 0) msgId.append(Integer.toHexString(to[0].toString().hashCode())); + Date date = mail.getSentDate(); + if (date != null) msgId.append(Integer.toHexString(date.hashCode())); + String subject = mail.getSubject(); + if (subject != null) msgId.append(Integer.toHexString(subject.hashCode())); + msgId.append("@nabble.com"); + return msgId.toString(); + } + + + + + + private static final Pop3Server fwdPop3Server = (Pop3Server)Init.get("fwdPop3Server"); + + private static class Lazy { + static final String emailPrefix; + static final String emailSuffix; + static final Pattern pattern; + static { + String addrSpec = fwdPop3Server.getUsername(); + int ind = addrSpec.indexOf('@'); + emailPrefix = addrSpec.substring(0, ind) + "+"; + emailSuffix = addrSpec.substring(ind); + pattern = Pattern.compile( + Pattern.quote(emailPrefix) + "([^@]+)\\+([^@]+)\\+([^@]+)" + Pattern.quote(emailSuffix) + , Pattern.CASE_INSENSITIVE + ); + } + } + + private static void processFwds() { + if( fwdPop3Server == null ) { + logger.error("fwdPop3Server not defined"); + System.exit(-1); + } + MailIterator mails = fwdPop3Server.getMail(); + try { + while( mails.hasNext() ) { + Mail mail = mails.next(); + try { + fwdMail(mail); + } catch (Exception e) { + logger.error("mail:\n"+mail.getRawInput(),e); + } + } + } finally { + mails.close(); + } + } + + private static void fwdMail(Mail mail) { + String[] envTo = mail.getHeader("Envelope-To"); + if (envTo == null) + envTo = mail.getHeader("X-Delivered-to"); // fastmail + if (envTo == null) + envTo = mail.getHeader("X-Original-To"); // postfix + String originalTo = envTo[0]; + Matcher matcher = Lazy.pattern.matcher(originalTo); + if( !matcher.matches() ) + throw new RuntimeException("invalid email: "+originalTo); + String fwdFrom = emailDecode(matcher.group(1)); + String fwdTo = emailDecode(matcher.group(2)); + if( (fwdFrom+fwdTo).hashCode() != Integer.parseInt(matcher.group(3)) ) + throw new RuntimeException("invalid hash: "+originalTo); + mail.setFrom(new MailAddress(fwdFrom)); + mail.setTo(new MailAddress(fwdTo)); + logger.info("Forwarding email to mailing list: " + fwdTo); + MailHome.getDefaultSmtpServer().send(mail); + } + + private static String fwdEmail(String from,String to) { + return Lazy.emailPrefix + emailEncode(from) + '+' + emailEncode(to) + '+' + (from+to).hashCode() + Lazy.emailSuffix; + } + + private static String emailEncode(String s) { + return HtmlUtils.urlEncode(s).replace('%','~'); + } + + private static String emailDecode(String s) { + return HtmlUtils.urlDecode(s.replace('~','%')); + } + +}