Mercurial Hosting > nabble
view src/nabble/model/MailingLists.java @ 0:7ecd1a4ef557
add content
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 21 Mar 2019 19:15:52 -0600 |
parents | |
children |
line wrap: on
line source
package nabble.model; import fschmidt.db.DbDatabase; import fschmidt.db.DbNull; import fschmidt.util.java.DateUtils; import fschmidt.util.java.HtmlUtils; import fschmidt.util.mail.Mail; import fschmidt.util.mail.MailAddress; import fschmidt.util.mail.MailAddressException; import fschmidt.util.mail.MailEncodingException; import fschmidt.util.mail.MailException; import fschmidt.util.mail.MailHome; import fschmidt.util.mail.MailIterator; import fschmidt.util.mail.MailParseException; import fschmidt.util.mail.Pop3Server; import fschmidt.util.mail.javamail.MstorInServer; import nabble.model.lucene.HitCollector; import nabble.model.lucene.LuceneSearcher; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.TimeZone; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.mail.internet.InternetAddress; import javax.mail.internet.AddressException; final class MailingLists { private static final Logger logger = LoggerFactory.getLogger(MailingLists.class); private static final float nameChangeFreq = Init.get("mlNameChangeFreq",0.1f); static final Pop3Server pop3Server = (Pop3Server)Init.get("mailingListArchivePop3Server"); private MailingLists() {} // never static { if( Init.hasDaemons ) { runMailingLists(); } } private static void runMailingLists() { if( pop3Server == null ) { logger.warn("no pop3 server defined, mailing lists not running"); return; } Executors.scheduleWithFixedDelay(new Runnable() { public void run(){ try { processMail(); processFwds(); } catch(MailException e) { logger.error("mailing list processing",e); } } }, 10, 10, TimeUnit.SECONDS ); logger.info("mailing lists enabled"); } private static void processMail() { MailIterator mails = pop3Server.getMail(); int count = 0; try { while( mails.hasNext() ) { Mail mail = mails.next(); try { makePost(mail); count++; } catch (MailAddressException e) { logger.warn("mail:\n"+mail.getRawInput(),e); // screwed-up mail } catch (Exception e) { logger.error("mail:\n"+mail.getRawInput(),e); } } } finally { mails.close(); if( count > 0 ) logger.error("Processed " + count + " emails."); } } static MailingList.ImportResult importMbox(File file,MailingListImpl ml,String mailErrorsToS,int maxErrors) throws ModelException { final DateFormat mailmanDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy"); final DateFormat mailDateFormat = new javax.mail.internet.MailDateFormat(); mailmanDateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); mailDateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); MailAddress mailErrorsTo = new MailAddress(mailErrorsToS); MstorInServer server = new MstorInServer(file); server.setMetaEnabled(false); MailIterator mails = server.getMail(); try { int imported = 0; int errors = 0; while( mails.hasNext() ) { Mail mail = mails.next(); try { try { mail.getFrom(); } catch (MailAddressException e) { String[] from = mail.getHeader("From"); if (from == null || from.length == 0) throw new MailAddressException("'From' not found in the header", e); mail.setHeader("From", from[0].replace(" at ", "@")); } Date sentDate = mail.getSentDate(); if ((sentDate==null || sentDate.getTime() < 0) && mail.getHeader("Date")!=null) { String dateH = mail.getHeader("Date")[0]; if (dateH!=null) { try { sentDate = mailmanDateFormat.parse(dateH); } catch (java.text.ParseException e) {} if (sentDate!=null) mail.setSentDate(sentDate); } } if ((sentDate==null || sentDate.getTime() < 0) && mail.getHeader("Resent-date")!=null) { String dateH = mail.getHeader("Resent-date")[0]; if (dateH!=null) { try { sentDate = mailDateFormat.parse(dateH); } catch (java.text.ParseException e) {} if (sentDate!=null) mail.setSentDate(sentDate); } } if ((sentDate==null || sentDate.getTime() < 0)) { String rawInput = mail.getRawInput(); try { String dateH = rawInput.substring(rawInput.indexOf(' ',5), rawInput.indexOf('\n')).trim(); sentDate = mailmanDateFormat.parse(dateH); } catch (Exception e) { logger.error("",e); // what kind of exception is ok? } if (sentDate!=null) mail.setSentDate(sentDate); } makeForumPost(mail,ml,true); imported++; } catch (Exception e) { sendErrorMail(mail, e, mailErrorsTo); errors++; if( errors >= maxErrors ) throw ModelException.newInstance("import_mbox_errors",""+errors+" errors reached after importing "+imported+" messages"); } } final int imported2 = imported; final int errors2 = errors; return new MailingList.ImportResult() { public int getImported() { return imported2; } public int getErrors() { return errors2; } }; } finally { mails.close(); } } private static void makePost(Mail mail) throws ModelException { MailingListImpl ml = getMailingList(mail); if (ml == null) { logger.info("Mailing list not found for: " + Arrays.asList(mail.getTo())); return; } if (checkForward(mail, ml)) { return; } if (checkPending(mail, ml)) { return; } makeForumPost(mail, ml, false); } private static void makeForumPost(Mail mail, MailingListImpl ml, boolean isImport) throws ModelException { String messageID = getMessageID(mail, msgFmt); mail.setMessageID(messageID); String message = mail.getRawInput(); message = message.replace("\000",""); // postgres can't handle 0 if( !msgFmt.isOk(message) ) return; String text = msgFmt.getMailText(message,null); NodeImpl forum = ml.getForumImpl(); if( doNotArchive(text) || (doNotArchive(mail) && !ml.ignoreNoArchive()) ) { logger.info("XNoArchive in "+forum.getSubject()); return; } DbDatabase db; try { db = forum.siteKey.getDb(); } catch(UpdatingException e) { return; // hack for schema migration } db.beginTransaction(); try { forum = (NodeImpl)forum.getGoodCopy(); MailingListImpl mailingList = forum.getMailingListImpl(); { NodeImpl post = forum.getNodeImplFromMessageID(messageID); if( post != null) { if(isImport) return; throw new RuntimeException("MessageID "+messageID+" already in db for forum "+forum.getId()); } } UserImpl user = getUser(mail, mailingList); if (user.isNoArchive()) return; String subject = mailingList.fixSubject(mail.getSubject()); if( subject==null || subject.trim().equals("") ) subject = "(no subject)"; if (!isImport) { ListServer oldListServer = mailingList.getListServer(); if (oldListServer==ListServer.unknown || oldListServer instanceof ListServer.Mailman) { ListServer listServer = detectListServer(mail); if (listServer!=null && listServer!=oldListServer && (oldListServer==ListServer.unknown || listServer==ListServer.mailman21)) { mailingList.setListServer(listServer); mailingList.update(); } } } Date now = new Date(); Date date = mail.getSentDate(); if( date==null || date.compareTo(now) > 0 || date.getTime() < 0) date = now; boolean isGuessedParent = false; String parentID = getParentID(mail, messageID); NodeImpl parent = forum.getNodeImplFromMessageID(parentID); if ( parent!=null && threadBySubject(forum, subject, parent.getSubject()) ) { parent = null; } NodeImpl[] orphans = NodeImpl.getFromParentID(messageID,mailingList); if ( parent==null ) { try { parent = guessParent(mail, date, mailingList, subject, orphans); if ( parent != null ) isGuessedParent = true; } catch(IOException e) { logger.error("guessParent failed",e); } } NodeImpl post = NodeImpl.newChildNode(Node.Kind.POST,user,subject,message,msgFmt,parent==null?forum:parent); if( parent==null && parentID != null ) { logger.debug("Orphan "+messageID+" starting new thread "); isGuessedParent = true; } post.setWhenCreated(date); post.setMessageID(messageID); if (isGuessedParent) { post.setGuessedParent(parentID); } else if (parent==null) { // for root posts which do not have parentID set guess flag to uncertain post.setGuessedParent((Boolean) null); } post.insert(false); if( isGuessedParent && parentID==null ) logger.debug("no parentID for "+post); for (NodeImpl orphan : orphans) { try { if (!threadBySubject(forum, subject, orphan.getSubject())) { orphan.changeParentImpl(post); } } catch (ModelException.NodeLoop e) { logger.error("", e); // should not happen now... orphan.getDbRecord().fields().put("parent_message_id", DbNull.STRING); orphan.getDbRecord().update(); } } db.commitTransaction(); } finally { db.endTransaction(); } } private static boolean threadBySubject(Node forum, String subject, String parentSubject) { if (!forumsThreadedBySubject.contains(forum.getId())) return false; return ! normalizeSubject(subject).equals(normalizeSubject(parentSubject)); } static final Set<Long> forumsThreadedBySubject = new HashSet<Long>(Arrays.asList((Long[])Init.get("forumsThreadedBySubject", new Long[0]))); private static void sendErrorMail(Mail mail, Exception e, MailAddress mailTo) { if( e instanceof UnsupportedEncodingException || e instanceof MailAddressException || e instanceof MailEncodingException || e instanceof MailParseException ) { logger.info(e.toString()); } else { logger.error("",e); } StringWriter sb = new StringWriter(); PrintWriter out = new PrintWriter(sb); e.printStackTrace( out ); out.close(); String msg = e.getMessage(); if (msg!=null && msg.indexOf('\n')>=0) msg = msg.substring(0, msg.indexOf('\n')).trim(); String subject = "error: "+msg; MailSubsystem.sendErrorMail(mail, mailTo, subject, sb.toString()); } private static String getParentID(Mail mail, String messageID) { String[] inReplyTos = mail.getHeader("In-Reply-To"); if( inReplyTos == null ) { inReplyTos = mail.getHeader("In-Reply-to"); if( inReplyTos != null ) logger.error("does this happen - case sensitive"); } if( inReplyTos != null ) { for (String inReplyTo : inReplyTos) { for( String s : MailSubsystem.stripMultiBrackets(inReplyTo) ) { if (!s.equals(messageID) && !s.equals("")) return s; } } } try { String[] refs = mail.getHeader("References"); if( refs != null ) { for (String ref : refs) { List<String> list = MailSubsystem.stripMultiBrackets(ref); if( list.isEmpty() ) continue; String s = list.get(list.size()-1); if (!s.equals(messageID) && !s.equals("")) return s; } } } catch(MailParseException e) { logger.warn("screwed up References for messageID="+messageID,e); } return null; } private static NodeImpl guessParent(Mail mail, Date date, MailingListImpl mailingList, String subject, NodeImpl[] orphans) throws IOException { Set<NodeImpl> offspring = new HashSet<NodeImpl>(); for (NodeImpl orphan : orphans) { for (NodeImpl n : orphan.getDescendantImpls()) { offspring.add(n); } } return guessParent(mail, date, mailingList, subject, offspring); } static NodeImpl guessParent(NodeImpl post, Collection<NodeImpl> ignore) { Mail mail = MailHome.newMail(post.getMessage().getRaw()); NodeImpl forum = post.getAppImpl(); if (forum == null) { return null; // detached post } MailingListImpl mailingList = forum.getAssociatedMailingListImpl(); if (mailingList == null) { return null; // forum no longer a mailing list } try { Set<NodeImpl> ignoreSet = new HashSet<NodeImpl>(); for( NodeImpl n : post.getDescendantImpls() ) { ignoreSet.add(n); } if (ignore != null) { ignoreSet.addAll(ignore); } return guessParent(mail, post.getWhenCreated(), mailingList, post.getSubject(), ignoreSet); } catch (IOException e) { throw new RuntimeException(e); } } private static NodeImpl guessParent(Mail mail, Date date, MailingListImpl mailingList, String subject, Set<NodeImpl> offspring) throws IOException { // attach to ancestor if any NodeImpl forum = mailingList.getForumImpl(); try { String[] refs = mail.getHeader("References"); if( refs != null ) { for( String ref : refs ) { final List<String> list = MailSubsystem.stripMultiBrackets(ref); for( int i=list.size()-1; i>=0; i-- ) { String ancestorID = list.get(i); NodeImpl parent = forum.getNodeImplFromMessageID(ancestorID); if (parent!=null && !offspring.contains(parent) && !threadBySubject(forum,subject,parent.getSubject())) { logger.debug("Attaching orphan "+mail.getMessageID()+" to grandparent "+parent); return parent; } } } } } catch(MailParseException e) { logger.warn("screwed up References",e); } // handle lost In-Reply-To headers // heuristics - use Thread-Topic header to find matching subjects in last 3 days String[] threadTopics = mail.getHeader("Thread-Topic"); String threadTopic = threadTopics==null?null:mailingList.fixSubject(threadTopics[0]); long forumId = forum.getId(); Filter filter = Lucene.getRangeFilter(DateUtils.addDays(date, -7), date); SiteImpl site = forum.getSiteImpl(); LuceneSearcher searcher = Lucene.newSearcher(site); try { if( threadTopic != null ) { threadTopic = threadTopic.toLowerCase(); if (threadTopic.startsWith("re: ")) threadTopic = threadTopic.substring(4); threadTopic = threadTopic.trim(); if (!threadTopic.equals("")) { NodeImpl parent = (NodeImpl)getPriorPost(site,searcher, forumId, threadTopic, filter, date, offspring); if( parent!=null && !offspring.contains(parent) && !threadBySubject(forum,subject,parent.getSubject())) return parent; } } // if no thread-topic, but subject starts with Re:, try with subject subject = subject.toLowerCase(); if( subject.startsWith("re: ") ) { subject = subject.substring(4).trim(); if ( !subject.equals(threadTopic) && !"".equals(subject) ) { NodeImpl parent = (NodeImpl)getPriorPost(site,searcher, forumId, subject, filter, date, offspring); if( parent!=null && !offspring.contains(parent)) return parent; } } } finally { searcher.close(); } return null; } private static boolean checkPending(Mail mail, MailingListImpl ml) { if (checkPending(getMessageID(mail, msgFmt), ml)) { return true; } String[] xMessageId = mail.getHeader("X-Message-Id"); if (xMessageId != null && xMessageId.length > 0 && checkPending(MailSubsystem.stripBrackets(xMessageId[0]), ml)) { return true; } xMessageId = mail.getHeader("X-Original-Message-Id"); if (xMessageId != null && xMessageId.length > 0 && checkPending(MailSubsystem.stripBrackets(xMessageId[0]), ml)) { return true; } return false; } private static boolean checkPending(String messageID, MailingListImpl ml) { NodeImpl pendingPost = ml.getForumImpl().getNodeImplFromMessageID(messageID); if( pendingPost==null ) return false; Node.MailToList mail = pendingPost.getMailToList(); if( mail == null ) { logger.warn("MessageID "+messageID+" already in db as "+pendingPost+" for forum "+ml.getId()); } else if( !mail.isPending() ) { logger.error("post not pending "+pendingPost); } else { mail.clearPending(); } return true; } private static boolean doNotArchive(Mail mail) { String[] xNoArchive = mail.getHeader("X-No-Archive"); if (xNoArchive != null && xNoArchive.length > 0 && "yes".equalsIgnoreCase(xNoArchive[0])) { return true; } String[] xArchive = mail.getHeader("X-Archive"); if (xArchive != null && xArchive.length > 0 && xArchive[0] != null && (xArchive[0].startsWith("expiry") || "no".equalsIgnoreCase(xArchive[0]))) { return true; } String[] archive = mail.getHeader("Archive"); if (archive != null && archive.length > 0 && "no".equalsIgnoreCase(archive[0])) { return true; } return false; } private static final Pattern xNoArchivePtn = Pattern.compile("(?im)\\AX-No-Archive: yes *$"); private static boolean doNotArchive(String text) { return xNoArchivePtn.matcher(text).find(); } private static MailingListImpl getMailingList(Mail mail) { MailingListImpl ml = null; String[] a = mail.getHeader("Envelope-To"); if (a == null) a = mail.getHeader("X-Delivered-to"); // fastmail if (a == null) a = mail.getHeader("X-Original-To"); // postfix if( a.length > 1 ) a = new String[] { a[0] }; for( String address : a[0].split(",") ) { address = address.trim(); MailingListImpl candidate = MailingListImpl.getMailingListByEnvelopeAddress(address); if (candidate == null) { // escaped list mail, bounce mail String returnPath = MailSubsystem.getReturnPath(mail); if( returnPath.equals(address) ) continue; // ignore spam MailSubsystem.bounce(mail, "Delivery to the following recipient failed permanently:\n\n " + address + "\n\nNo archive exists for this address.\n" ); logger.warn( "no mailing list found for "+address+" - bouncing mail to "+returnPath + ":\n" + mail); } else { if( ml != null ) logger.error("mailing list already set"); ml = candidate; } } return ml; } private static String extractDomain(String email) { String domain = email.substring(email.indexOf('@')+1).toLowerCase(); // hack to unify google messages return domain.replace("google.com","googlegroups.com"); } private static boolean checkForward(Mail mail, MailingListImpl ml) { // check if the archive guessed from subscription address is not presented in the // common headers that contain list address, forward to the archive owner in this case String envTo[] = mail.getHeader("Envelope-To"); if (envTo == null) envTo = mail.getHeader("X-Delivered-to"); // fastmail if (envTo == null) envTo = mail.getHeader("X-Original-To"); // postfix String originalTo = envTo[0]; { MailAddress[] to = mail.getTo(); if( to==null || to.length!=1 || !to[0].getAddrSpec().equalsIgnoreCase(originalTo) ) return false; } // check for domain of the message's From: or Reply-To: String listAddress = ml.getListAddress(); String domain = extractDomain(listAddress); String maintenanceMessageReplyTo = null; { MailAddress[] replyTos = mail.getReplyTo(); if (replyTos != null) { for (MailAddress replyTo : replyTos) { String replyDomain = extractDomain(replyTo.getAddrSpec()); if (replyDomain.endsWith(domain) || domain.endsWith(replyDomain)) { maintenanceMessageReplyTo = replyTo.getAddrSpec(); break; } } } } MailAddress from = mail.getFrom(); // first we compare the domains if( maintenanceMessageReplyTo == null && from != null && (extractDomain(from.getAddrSpec()).endsWith(domain) || domain.endsWith(extractDomain(from.getAddrSpec())))) maintenanceMessageReplyTo = from.getAddrSpec(); // check if this is a majordomo email if (maintenanceMessageReplyTo == null && from != null && from.getAddrSpec().toLowerCase().startsWith("majordomo@")) maintenanceMessageReplyTo = from.getAddrSpec(); if( maintenanceMessageReplyTo != null ) { mail.setReplyTo( new MailAddress(fwdEmail(originalTo, maintenanceMessageReplyTo) )); MailAddress ownerAddress = getArchiveOwnerAddress(ml); MailHome.getDefaultSmtpServer().send(mail,ownerAddress); logger.info("Forwarding maintenance message to owner: " + ownerAddress); logger.info(mail.getRawInput()); return true; } if( MailSubsystem.getReturnPath(mail).equals("") ) { MailAddress ownerAddress = getArchiveOwnerAddress(ml); MailHome.getDefaultSmtpServer().send(mail,ownerAddress); logger.info("Forwarding maintenance message to owner: " + ownerAddress); logger.info(mail.getRawInput()); return true; } logger.info("Bouncing email to: " + MailSubsystem.getReturnPath(mail) + " / envelopeTo = " + originalTo + "\n" + mail.getRawInput()); MailSubsystem.bounce(mail, "Delivery to the following recipient failed permanently:\n\n " + originalTo + "\n\nThis email address is only for archiving mailing lists and should not be used directly.\n" ); return true; } private static MailAddress getArchiveOwnerAddress(MailingListImpl mailingList) { // If this list was exported to another server, we have to send this email // to the person that did the export. Otherwise we send to the current owner. String exportOwner = mailingList.getExportOwner(); if (exportOwner == null) { // Send to the current owner... User owner = mailingList.getForumImpl().getOwnerImpl(); return new MailAddress(owner.getEmail(), owner.getName()); } else { // Send to the person who exported the archive... return new MailAddress(exportOwner); } } private static MailAddress toMailAddress(String s) { try { InternetAddress ia = new InternetAddress(s); return new MailAddress(ia.getAddress(),ia.getPersonal()); } catch(AddressException e) { return null; } } private static UserImpl getUser(Mail mail, MailingListImpl mailingList) { MailAddress addr = null; String a[] = mail.getHeader("X-Original-From"); if( a != null ) addr = toMailAddress(a[0]); if( addr == null ) addr = mail.getFrom(); String email = addr.getAddrSpec(); if (email == null || "".equals(email.trim())) { throw new MailAddressException("Invalid sender address: "+addr); } SiteImpl site = mailingList.getForumImpl().getSiteImpl(); UserImpl user = site.getUserImplFromEmail(email); if( user==null || !user.isRegistered() ) { String username; if( email.equalsIgnoreCase(mailingList.getListAddress()) ) { username = mailingList.getForum().getSubject() + " mailing list"; } else { username = addr.getDisplayName(); if( username == null || "".equals(username.trim()) ) { username = email.indexOf('@')>0 ? email.substring(0, email.indexOf('@')) : email; } } if( username.endsWith(" (JIRA)") ) { username = "JIRA "+email; } if( user==null ) { user = UserImpl.createGhost(site,email); user.setNameLike(username,false); user.insert(); } else { String oldName = user.getName(); if( !oldName.toLowerCase().startsWith(username.toLowerCase()) && (Math.random() < nameChangeFreq) ) { user.setNameLike(username,false); user.getDbRecord().update(); logger.warn("changed name of "+user+" from '"+oldName+"' to '"+user.getName()+"'"); } } } return user; } static final MailMessageFormat msgFmt = new MailMessageFormat('m', "mail"); private static Node getPriorPost(final SiteImpl site,final LuceneSearcher searcher, long forumId, final String subject, Filter filter, final Date to, final Set offspring) throws IOException { //String phrase = "\""+QueryParser.escape(subject.replace('\"',' '))+"\""; try { NodeSearcher.Builder query = new NodeSearcher.Builder(site,forumId); query.addNodeKind(Node.Kind.POST); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,Lucene.SUBJECT_FLD, Lucene.analyzer); parser.setDefaultOperator(QueryParser.AND_OPERATOR); Query subjectQuery = parser.parse(QueryParser.escape(subject.replace('\"',' ').replace("&&"," "))); if (! (subjectQuery instanceof BooleanQuery && ((BooleanQuery)subjectQuery).getClauses().length==0) ) query.addQuery(subjectQuery); final Node[] resultHolder = new Node[1]; searcher.search( query.build().getQuery(), filter, new HitCollector() { protected void process(Document doc) { NodeImpl post = Lucene.node(site,doc); if (post==null) return; String parentSubject = post.getSubject().toLowerCase(); if ( (parentSubject.equals(subject) || (parentSubject.startsWith("re: ") && parentSubject.substring(4).trim().equals(subject))) && to.after(post.getWhenCreated()) && (resultHolder[0]==null || resultHolder[0].getWhenCreated().before(post.getWhenCreated())) && !offspring.contains(post) ) resultHolder[0] = post; } }); Node result = resultHolder[0]; if (result != null) { // find the uppermost post with almost-the-same subject String subjectEtalon = normalizeSubject(subject.toLowerCase()); Node resultCandidate = result.getTopic(); while (result != null) { String resultSubject = normalizeSubject(result.getSubject().toLowerCase()); if (!resultSubject.equals(subjectEtalon)) break; // break when subject really changes // this post has almost-the-same subject if (!offspring.contains(result)) { // set only if this node is not presented in escape-set resultCandidate = result; } result = result.getParent(); } result = resultCandidate; } return result; } catch (ParseException e) { throw new RuntimeException(e); } } private static final Pattern bracketRegex = Pattern.compile("\\[[^\\[]+\\]"); static Pattern prefixRegex(String prefixes) { return Pattern.compile( "^((" + prefixes + "): *)+" ); } private static final Pattern defaultPrefixRegex = prefixRegex("re|aw|res|fwd|答复"); /** * Remove from subject all possible prefixes which are added while forwarding, replying, etc... * * @param subject original subject * @return normalized subject */ private static String normalizeSubject(String subject) { return normalizeSubject(subject,defaultPrefixRegex); } static String normalizeSubject(String subject,Pattern prefixRegex) { if (subject != null) { subject = subject.toLowerCase().trim(); subject = bracketRegex.matcher(subject).replaceAll(""); subject = prefixRegex.matcher(subject).replaceAll(""); } return subject; } static void nop() {} private static ListServer detectListServer(Mail mail) { String[] mailman = mail.getHeader("X-Mailman-Version"); if (mailman!=null && mailman.length==1 && mailman[0]!=null) { if (mailman[0].startsWith("2.0")) { return ListServer.mailman20; } else if (mailman[0].startsWith("2.1")) { return ListServer.mailman21; } else if (mailman[0].startsWith("2.")) { logger.error("unknown mailman version: "+mailman[0]+" in message "+mail.getMessageID()); } return null; } String[] mList = mail.getHeader("Mailing-List"); if (mList!=null && mList.length==1 && mList[0]!=null) { if (mList[0].indexOf("run by ezmlm")>=0) { return ListServer.ezmlm; } else if (mList[0].indexOf("@yahoogr")>0 || mList[0].indexOf("@gruposyahoo")>0) { return ListServer.yahoo; } else if (mList[0].indexOf("@googlegroups")>0) { return ListServer.google; } } String[] listproc = mail.getHeader("X-Listprocessor-Version"); if (listproc!=null && listproc.length==1 && listproc[0]!=null) { if (listproc[0].indexOf("ListProc")>=0) { if (listproc[0].indexOf("CREN")>=0) { return ListServer.listproc; } else { return ListServer.oldlistproc; } } else { logger.error("unknown listproc version: "+listproc[0]+" in message "+mail.getMessageID()); return null; } } String[] ecartis = mail.getHeader("X-ecartis-version"); if (ecartis!=null && ecartis.length==1 && ecartis[0]!=null) { if (ecartis[0].indexOf("Ecartis")>=0) { return ListServer.ecartis; } else { logger.error("unknown ecartis version: "+ecartis[0]+" in message "+mail.getMessageID()); return null; } } String[] lyris = mail.getHeader("X-LISTMANAGER-Message-Id"); if (lyris!=null && lyris.length==1 && lyris[0]!=null) { if (lyris[0].indexOf("LISTMANAGER")>=0) { return ListServer.lyris; } else { logger.error("unexpected x-listmanager-message-id header: "+lyris[0]+" in message "+mail.getMessageID()); return null; } } String[] xListServer = mail.getHeader("X-ListServer"); if (xListServer!=null && xListServer.length==1 && xListServer[0]!=null) { if (xListServer[0].indexOf("CommuniGate")>=0) { return ListServer.communigate; } else { logger.error("unknown x-listserver header: "+xListServer[0]+" in message "+mail.getMessageID()); return null; } } // may not be reliable String[] listSubscribe = mail.getHeader("List-Subscribe"); if (listSubscribe!=null && listSubscribe.length==1 && listSubscribe[0]!=null) { if (listSubscribe[0].indexOf("+subscribe@")>0) { return ListServer.mlmmj; } else if (listSubscribe[0].indexOf("listserver@")>=0) { return ListServer.listserver; } else if (listSubscribe[0].indexOf("sympa@")>=0) { return ListServer.sympa; } } // not possible to detect: // listserv // majordomo / majordomo2 // smartlist return null; } /* static void redoGuessedParents() {} public static void rethreadPosts(boolean inBatch) throws SQLException{ rethreadPosts(1, 0, inBatch); } public static void rethreadPosts(long startingPostId, boolean inBatch) throws SQLException{ rethreadPosts(startingPostId, 0, inBatch); } static void rethreadPosts(long startingPostId, long forumId, boolean inBatch) throws SQLException{ Logger batchLog = inBatch ? Batch.logger : logger; if(startingPostId > 1) batchLog.info("Starting rethread from post " + startingPostId); if(forumId > 0) batchLog.info("Rethreading for forum " + forumId); //the WHERE condition is post_id > postId ! long postId = startingPostId - 1; int processed_post_count = 0; int modified_post_count = 0; int null_parent_count = 0; NodeImpl post = null; boolean more = true; try { outer: while (more) { Connection con = Db.db.getConnection(); try { con.setAutoCommit(false); PreparedStatement stmt = con.prepareStatement( (forumId > 0) ? "SELECT * FROM descendants(" + forumId + ") WHERE node_id > ? AND " + "(guessed_parent='t' OR guessed_parent is null) AND " + "msg_fmt='m'" + "ORDER BY node_id LIMIT 1" : "SELECT * FROM node WHERE node_id > ? AND " + "(guessed_parent='t' OR guessed_parent is null) AND " + "msg_fmt='m'" + "ORDER BY node_id LIMIT 1" ); stmt.setLong(1, postId); ResultSet rs = stmt.executeQuery(); more = false; while (rs.next()) { more = true; post = NodeImpl.getNode(rs); try { postId = post.getId(); Mail mail = MailHome.newMail(post.getMessage().getRaw()); MailingListImpl mailingList = post.getAppImpl().getAssociatedMailingListImpl(); if (mailingList==null) continue; // forum no longer a mailing list List<NodeImpl> descendants = new ArrayList<NodeImpl>(); for( NodeImpl n : post.getDescendantImpls() ) { descendants.add(n); } NodeImpl parent = guessParent(mail, post.getWhenCreated(), mailingList, post.getSubject(), descendants.toArray(new NodeImpl[0])); if (parent != null && parent.getId() != post.getParentId()) { try { batchLog.debug("setting parent of " + post + " to " + parent); post.setGuessedParent(parent); if(++ modified_post_count % 100 == 0) batchLog.info("Modified " + modified_post_count + " posts"); } catch (ModelException.NodeLoop e) { batchLog.error("",e); } } else if(parent == null && post.getParentId() != 0 && post.getParent().getKind()!=Node.Kind.APP){ batchLog.info("Null parent at " + post); null_parent_count ++; } } catch(Exception x){ batchLog.error("Exception at post " + post, x); break outer; } if(++ processed_post_count % 3000 == 0) batchLog.info("Processed " + processed_post_count + " posts, current postId: "+postId); if (inBatch) { Batch.checkStopped(); } } stmt.close(); con.commit(); } finally { con.close(); } } } finally { batchLog.info("Exited at post " + post); batchLog.info("Processed " + processed_post_count + " posts"); batchLog.info("Modified " + modified_post_count + " posts"); batchLog.info("Guessed null parent at " + null_parent_count + " posts"); } } */ private static void getRethreadIds(Connection con,long parentId,Collection<Long> ids) throws SQLException { PreparedStatement stmt = con.prepareStatement( "select node_id, guessed_parent, msg_fmt from node where parent_id = ?" ); stmt.setLong(1,parentId); ResultSet rs = stmt.executeQuery(); while( rs.next() ) { long id = rs.getLong("node_id"); if( "m".equals(rs.getString("msg_fmt")) && ( rs.getBoolean("guessed_parent") || rs.wasNull() ) ) ids.add(id); getRethreadIds(con,id,ids); } rs.close(); stmt.close(); } static void rethreadForum(NodeImpl forum, boolean inBatch) throws SQLException{ long forumId = forum.getId(); long rethreadStart = System.currentTimeMillis(); Logger batchLog = inBatch ? Batch.logger : logger; batchLog.info("Rethreading for forum " + forumId); SiteKey siteKey = forum.getSiteImpl().siteKey; DbDatabase db = siteKey.getDb(); Collection<Long> ids = new ArrayList<Long>(); { Connection con = db.getConnection(); long queryStart = System.currentTimeMillis(); getRethreadIds(con,forumId,ids); batchLog.info("Query took " + (System.currentTimeMillis() - queryStart) + " ms"); con.close(); } batchLog.info(ids.size() + " posts to process..."); //the WHERE condition is post_id > postId ! int processed_post_count = 0; int modified_post_count = 0; int null_parent_count = 0; try { while (ids.size() > 0) { Connection con = db.getConnection(); try { con.setAutoCommit(false); long id = ids.iterator().next(); ids.remove(id); NodeImpl post = NodeImpl.getNode(siteKey,id); try { Mail mail = MailHome.newMail(post.getMessage().getRaw()); MailingListImpl mailingList = post.getAppImpl().getAssociatedMailingListImpl(); if (mailingList==null) continue; // forum no longer a mailing list List<NodeImpl> descendants = new ArrayList<NodeImpl>(); for( NodeImpl n : post.getDescendantImpls() ) { descendants.add(n); } NodeImpl parent = guessParent(mail, post.getWhenCreated(), mailingList, post.getSubject(), descendants.toArray(new NodeImpl[0])); if (parent != null && parent.getId() != post.getParentId()) { try { batchLog.info("setting parent of " + post + " to " + parent); post.setGuessedParent(parent); if(++ modified_post_count % 1000 == 0) batchLog.info("Modified " + modified_post_count + " posts"); } catch (ModelException.NodeLoop e) { batchLog.error("",e); } } else if(parent == null && post.getParentId() != 0 && post.getParent().getKind()!=Node.Kind.APP){ batchLog.info("Null parent at " + post); null_parent_count ++; } } catch(Exception x){ batchLog.error("Exception at " + post + " - message:\n"+post.getMessage().getRaw(), x); break; } if(++ processed_post_count % 1000 == 0) batchLog.info("Processed " + processed_post_count + " posts, current postId: "+id); if (inBatch) { Batch.checkStopped(); } con.commit(); } finally { con.close(); } } } finally { batchLog.info("Rethread took " + (System.currentTimeMillis() - rethreadStart) + " ms"); batchLog.info("Processed " + processed_post_count + " posts"); batchLog.info("Modified " + modified_post_count + " posts"); batchLog.info("Guessed null parent at " + null_parent_count + " posts"); } } /** * Get or create message id from an email * * @param mail email message * @param msgFmt message format to use * @return message id, never null */ private static String getMessageID(Mail mail, Message.Format msgFmt) { String[] messageIds = mail.getHeader("Message-Id"); // returns both Id and ID if (messageIds == null || messageIds.length == 0 || messageIds[messageIds.length - 1] == null) { return calcMessageID(mail, msgFmt); } else { return MailSubsystem.stripBrackets(messageIds[messageIds.length - 1]); } } /** * Create a new message if for an email message * * @param mail mail message to process * @param msgFmt message format to use * @return a new message id, never null */ private static String calcMessageID(Mail mail, Message.Format msgFmt) { StringBuilder msgId = new StringBuilder(); msgId.append("MissingID."); String text = msgFmt.getText(mail.getRawInput(),null); msgId.append(Integer.toHexString(text.hashCode())); MailAddress from = mail.getFrom(); if (from != null) msgId.append(Integer.toHexString(from.toString().hashCode())); MailAddress[] to = mail.getTo(); if (to != null && to.length > 0) msgId.append(Integer.toHexString(to[0].toString().hashCode())); Date date = mail.getSentDate(); if (date != null) msgId.append(Integer.toHexString(date.hashCode())); String subject = mail.getSubject(); if (subject != null) msgId.append(Integer.toHexString(subject.hashCode())); msgId.append("@nabble.com"); return msgId.toString(); } private static final Pop3Server fwdPop3Server = (Pop3Server)Init.get("fwdPop3Server"); private static class Lazy { static final String emailPrefix; static final String emailSuffix; static final Pattern pattern; static { String addrSpec = fwdPop3Server.getUsername(); int ind = addrSpec.indexOf('@'); emailPrefix = addrSpec.substring(0, ind) + "+"; emailSuffix = addrSpec.substring(ind); pattern = Pattern.compile( Pattern.quote(emailPrefix) + "([^@]+)\\+([^@]+)\\+([^@]+)" + Pattern.quote(emailSuffix) , Pattern.CASE_INSENSITIVE ); } } private static void processFwds() { if( fwdPop3Server == null ) { logger.error("fwdPop3Server not defined"); System.exit(-1); } MailIterator mails = fwdPop3Server.getMail(); try { while( mails.hasNext() ) { Mail mail = mails.next(); try { fwdMail(mail); } catch (Exception e) { logger.error("mail:\n"+mail.getRawInput(),e); } } } finally { mails.close(); } } private static void fwdMail(Mail mail) { String[] envTo = mail.getHeader("Envelope-To"); if (envTo == null) envTo = mail.getHeader("X-Delivered-to"); // fastmail if (envTo == null) envTo = mail.getHeader("X-Original-To"); // postfix String originalTo = envTo[0]; Matcher matcher = Lazy.pattern.matcher(originalTo); if( !matcher.matches() ) throw new RuntimeException("invalid email: "+originalTo); String fwdFrom = emailDecode(matcher.group(1)); String fwdTo = emailDecode(matcher.group(2)); if( (fwdFrom+fwdTo).hashCode() != Integer.parseInt(matcher.group(3)) ) throw new RuntimeException("invalid hash: "+originalTo); mail.setFrom(new MailAddress(fwdFrom)); mail.setTo(new MailAddress(fwdTo)); logger.info("Forwarding email to mailing list: " + fwdTo); MailHome.getDefaultSmtpServer().send(mail); } private static String fwdEmail(String from,String to) { return Lazy.emailPrefix + emailEncode(from) + '+' + emailEncode(to) + '+' + (from+to).hashCode() + Lazy.emailSuffix; } private static String emailEncode(String s) { return HtmlUtils.urlEncode(s).replace('%','~'); } private static String emailDecode(String s) { return HtmlUtils.urlDecode(s.replace('~','%')); } }