annotate src/global/Site.java @ 34:61800d34be0d

whitelist
author Franklin Schmidt <fschmidt@gmail.com>
date Tue, 07 Jul 2020 09:44:17 -0600
parents abe0694e9849
children 56accc959f8c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
1 package global;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
2
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
3 import fschmidt.util.java.HtmlUtils;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
4 import nabble.view.lib.ViewUtils;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
5 import nabble.model.Init;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
6 import org.apache.lucene.analysis.Analyzer;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
7 import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
8 import org.apache.lucene.document.Document;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
9 import org.apache.lucene.document.Field;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
10 import org.apache.lucene.document.NumericField;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
11 import org.apache.lucene.index.IndexReader;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
12 import org.apache.lucene.index.IndexWriter;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
13 import org.apache.lucene.index.Term;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
14 import org.apache.lucene.search.Sort;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
15 import org.apache.lucene.search.SortField;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
16 import org.apache.lucene.store.FSDirectory;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
17 import org.apache.lucene.util.Version;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
18 import org.slf4j.Logger;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
19 import org.slf4j.LoggerFactory;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
20
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
21 import java.io.File;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
22 import java.io.IOException;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
23 import java.io.BufferedReader;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
24 import java.io.FileReader;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
25 import java.sql.Connection;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
26 import java.sql.ResultSet;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
27 import java.sql.SQLException;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
28 import java.sql.Statement;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
29 import java.util.ArrayList;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
30 import java.util.Date;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
31 import java.util.List;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
32 import java.util.Map;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
33 import java.util.HashMap;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
34 import java.util.Set;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
35 import java.util.HashSet;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
36 import java.util.Arrays;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
37 import java.util.regex.Pattern;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
38 import java.util.regex.Matcher;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
39
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
40
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
41 public final class Site {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
42 private static final Logger logger = LoggerFactory.getLogger(Site.class);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
43
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
44 public static volatile String status = "";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
45
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
46 public static final String SERVER_FLD = "server";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
47 public static final String SITE_FLD = "site";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
48 public static final String DOMAIN_FLD = "domain";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
49 public static final String SUBJECT_FLD = "subject";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
50 public static final String MESSAGE_FLD = "message";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
51 public static final String TYPE_FLD = "type";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
52 public static final String ACTIVITY_FLD = "activity";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
53 public static final String EMBARRASSING_FLD = "embarrassing";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
54 public static final String PRIVATE_FLD = "private";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
55 public static final String OWNER_EMAIL_FLD = "owner_email";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
56 public static final String OWNER_EMAIL_DOMAIN_FLD = "owner_email_domain";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
57 public static final String NODE_COUNT_FLD = "node_count";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
58 public static final String WHEN_CREATED_FLD = "when_created";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
59 public static final String TWEAKS_FLD = "tweaks";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
60 public static final String HAS_TWEAKS_FLD = "has_tweaks";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
61 public static final String FILE_COUNT_FLD = "file_count";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
62 public static final String FILE_NODE_RATIO_FLD = "file_node_ratio";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
63 public static final String MONTHLY_VIEWS_FLD = "monthly_views";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
64 public static final String VALUE_FLD = "value";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
65
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
66 public static final Sort SORT_BY_ACTIVITY = new Sort(new SortField(ACTIVITY_FLD, SortField.INT, true));
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
67 public static final Sort SORT_BY_FILE_NODE_RATIO = new Sort(new SortField(FILE_NODE_RATIO_FLD, SortField.INT, true));
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
68 public static final Sort SORT_BY_VALUE = new Sort(
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
69 new SortField(VALUE_FLD, SortField.INT, true),
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
70 new SortField(NODE_COUNT_FLD, SortField.INT, true)
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
71 );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
72
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
73 public static final Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_CURRENT,"English");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
74 private static final FSDirectory dir1;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
75 private static final FSDirectory dir2;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
76 private static volatile Thread thread = null;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
77 private static final boolean skipReindex1 = Init.get("skipReindex1",false);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
78
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
79 static {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
80 try {
2
abe0694e9849 replace local_dir with home_dir
Franklin Schmidt <fschmidt@gmail.com>
parents: 0
diff changeset
81 String localDir = (String)Init.get("home_dir")+"local/";
0
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
82 dir1 = FSDirectory.open(new File(localDir+"lucene_raw"));
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
83 dir2 = FSDirectory.open(new File(localDir+"lucene_global"));
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
84 } catch(IOException e) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
85 throw new RuntimeException(e);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
86 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
87 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
88
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
89 public static FSDirectory dir() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
90 return dir2;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
91 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
92 /*
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
93 static synchronized void clear()
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
94 throws IOException
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
95 {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
96 new IndexWriter(dir,analyzer,true,IndexWriter.MaxFieldLength.LIMITED).close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
97 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
98 */
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
99 public static boolean isReindexing() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
100 return thread != null && thread.isAlive();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
101 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
102
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
103 public static void startReindexing(){
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
104 status = "starting reindex";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
105 thread = new Thread(new Runnable(){public void run(){
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
106 try {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
107 reindex();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
108 } catch(SQLException e) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
109 logger.error("",e);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
110 status = "Error: " + e.getMessage();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
111 } catch(IOException e) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
112 logger.error("",e);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
113 status = "Error: " + e.getMessage();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
114 } catch(RuntimeException e) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
115 logger.error("",e);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
116 status = "Error: " + e.getMessage();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
117 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
118 }},"reindex");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
119 thread.start();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
120 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
121
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
122 static synchronized void reindex()
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
123 throws SQLException, IOException
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
124 {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
125 if( !skipReindex1 )
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
126 reindex1();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
127 status = "done indexing servers";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
128 logger.info("reindex2");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
129 reindex2();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
130 logger.info("done reindexing");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
131 status = "done reindexing";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
132 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
133
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
134 private static void reindex1()
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
135 throws SQLException, IOException
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
136 {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
137 IndexWriter indexWriter = new IndexWriter(dir1,analyzer,true,IndexWriter.MaxFieldLength.LIMITED);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
138 try {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
139 for( Server server : Server.getServers() ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
140 reindex1(server,indexWriter);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
141 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
142 } finally {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
143 indexWriter.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
144 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
145 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
146 /*
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
147 public static synchronized void removeBySubject(String subject)
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
148 throws IOException
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
149 {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
150 Term term = new Term( SUBJECT_FLD, subject );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
151 IndexWriter indexWriter = new IndexWriter(dir2,analyzer,IndexWriter.MaxFieldLength.LIMITED);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
152 indexWriter.deleteDocuments(term);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
153 indexWriter.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
154 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
155 */
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
156 private static synchronized void reindex1(Server server,IndexWriter indexWriter)
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
157 throws SQLException, IOException
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
158 {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
159 logger.info("reindex "+server.name);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
160 status = "reindexing "+server.name;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
161
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
162 List<Long> siteIds = new ArrayList<Long>();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
163 {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
164 Connection con = server.getConnection();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
165 Statement stmt = con.createStatement();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
166 ResultSet rs = stmt.executeQuery(
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
167 "select site_id from global.site_global"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
168 );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
169 while( rs.next() ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
170 long siteId = rs.getLong("site_id");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
171 siteIds.add(siteId);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
172 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
173 rs.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
174 stmt.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
175 con.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
176 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
177 final int n = siteIds.size();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
178 int count = 0;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
179 for( long siteId : siteIds ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
180 update(indexWriter,server,siteId);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
181 count++;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
182 logger.info("reindexed "+count+" of "+n+" sites on "+server.name);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
183 status = "reindexed "+count+" of "+n+" sites on "+server.name;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
184 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
185 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
186
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
187 private static void update(IndexWriter indexWriter,Server server,long siteId)
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
188 throws IOException, SQLException
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
189 {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
190 Connection con = server.getConnection();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
191 try {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
192 String schema = "s" + siteId;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
193 Statement stmt = con.createStatement();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
194 ResultSet rs = stmt.executeQuery(
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
195 "select site_global.*, site.*, node.*, node_msg.message, priv.label as priv, user_.email, tweak.tweaks"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
196 +", (select count(*) from " + schema + ".file_node) as file_node_count"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
197 +" from global.site_global"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
198 +", " + schema + ".site"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
199 +" join " + schema + ".node on site.root_node_id = node.node_id"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
200 +" join " + schema + ".node_msg on site.root_node_id = node_msg.node_id"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
201 +" left join " + schema + ".tag as priv on site.root_node_id = priv.node_id and priv.user_id is null and priv.label='permission:View'"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
202 +" left join " + schema + ".user_ on node.owner_id = user_.user_id"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
203 +" , (select string_agg(content,' ') as tweaks from " + schema + ".tweak) as tweak"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
204 +" where site_global.site_id = " + siteId
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
205 );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
206 if( !rs.next() ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
207 logger.error("site not found: "+siteId);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
208 return;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
209 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
210 Document doc = new Document();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
211 doc.add( new Field(SERVER_FLD, server.name, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
212 doc.add( new Field(SITE_FLD, Long.toString(siteId), Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
213 String subject = rs.getString("subject");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
214 String domain = rs.getString("custom_domain");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
215 if( domain == null )
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
216 domain = ViewUtils.getDefaultBaseUrl(siteId,subject,server.host);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
217 doc.add( new Field(DOMAIN_FLD, domain, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
218 doc.add( new Field(SUBJECT_FLD, subject, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
219 String message = rs.getString("message");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
220 doc.add( new Field(MESSAGE_FLD, message, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
221 String type = "" + rs.getString("type");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
222 doc.add( new Field(TYPE_FLD, type, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
223 int activity = rs.getInt("activity");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
224 doc.add( new NumericField(ACTIVITY_FLD,Field.Store.YES,false).setIntValue(activity) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
225 String embarrassing = Boolean.toString( rs.getBoolean("is_embarrassing") );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
226 doc.add( new Field(EMBARRASSING_FLD, embarrassing, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
227 String privS = rs.getString("priv");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
228 String priv = Boolean.toString( privS != null );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
229 doc.add( new Field(PRIVATE_FLD, priv, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
230 String email = rs.getString("email");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
231 if( email != null ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
232 doc.add( new Field(OWNER_EMAIL_FLD, email, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
233 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
234 int nodeCount = rs.getInt("node_count");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
235 doc.add( new NumericField(NODE_COUNT_FLD,Field.Store.YES,false).setIntValue(nodeCount) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
236 Date whenCreated = rs.getTimestamp("when_created");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
237 doc.add( new NumericField(WHEN_CREATED_FLD,Field.Store.YES,false).setLongValue(whenCreated.getTime()) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
238 String tweaks = rs.getString("tweaks");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
239 if( tweaks != null ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
240 doc.add( new Field(TWEAKS_FLD, tweaks, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
241 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
242 int fileCount = rs.getInt("file_node_count");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
243 doc.add( new NumericField(FILE_COUNT_FLD,Field.Store.YES,false).setIntValue(fileCount) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
244 int monthlyViews = rs.getInt("monthly_views");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
245 doc.add( new NumericField(MONTHLY_VIEWS_FLD,Field.Store.YES,false).setIntValue(monthlyViews) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
246 rs.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
247 stmt.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
248 indexWriter.addDocument(doc);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
249 } catch(SQLException e) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
250 logger.error("failed to index site "+siteId,e);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
251 String msg = e.getMessage();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
252 if( !(msg.contains("schema") && msg.contains("does not exist")) )
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
253 throw e;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
254 } finally {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
255 con.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
256 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
257 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
258
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
259 private static final Pattern ptn = Pattern.compile("([^,]+),(\\d+|\"[0-9,]+\")");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
260
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
261 private static Map<String,Integer> domainMap()
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
262 throws IOException
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
263 {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
264 Map<String,Integer> map = new HashMap<String,Integer>();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
265 File siteFile = new File("data/sites.csv");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
266 if( !siteFile.exists() )
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
267 return map;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
268 BufferedReader in = new BufferedReader(new FileReader(siteFile));
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
269 String line = in.readLine();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
270 try {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
271 while (!ptn.matcher(line).matches())
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
272 line = in.readLine();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
273 while (true) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
274 if (line == null)
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
275 break;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
276 if (line.length() > 0) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
277 if( line.startsWith(",") )
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
278 break;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
279 Matcher m = ptn.matcher(line);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
280 if (!m.matches())
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
281 throw new RuntimeException(line);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
282 String domain = m.group(1);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
283 String amt = m.group(2);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
284 amt = amt.replaceAll("[,\"]", "");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
285 int sessions = Integer.parseInt(amt);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
286 map.put(domain, sessions);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
287 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
288 line = in.readLine();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
289 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
290 in.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
291 } catch (RuntimeException e) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
292 logger.error("Error in line: " + line, e);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
293 throw e;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
294 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
295 return map;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
296 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
297
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
298 private static void reindex2()
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
299 throws IOException
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
300 {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
301 Map<String,Integer> domainMap = domainMap();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
302 IndexReader reader = IndexReader.open(dir1);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
303 IndexWriter indexWriter = new IndexWriter(dir2,analyzer,true,IndexWriter.MaxFieldLength.LIMITED);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
304 try {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
305 int n = reader.numDocs();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
306 if( n != reader.maxDoc() )
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
307 throw new RuntimeException();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
308 for( int i=0; i<n; i++ ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
309 Document data = reader.document(i);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
310 Document doc = new Document();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
311 doc.add( new Field(SERVER_FLD, data.get(SERVER_FLD), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
312 doc.add( new Field(SITE_FLD, data.get(SITE_FLD), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
313 String domain = data.get(DOMAIN_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
314 doc.add( new Field(DOMAIN_FLD, domain, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
315 doc.add( new Field(SUBJECT_FLD, data.get(SUBJECT_FLD), Field.Store.YES, Field.Index.ANALYZED) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
316 doc.add( new Field(MESSAGE_FLD, data.get(MESSAGE_FLD), Field.Store.YES, Field.Index.ANALYZED) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
317 doc.add( new Field(TYPE_FLD, data.get(TYPE_FLD), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
318 int activity = ((NumericField)data.getFieldable(ACTIVITY_FLD)).getNumericValue().intValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
319 doc.add( new NumericField(ACTIVITY_FLD,Field.Store.YES,true).setIntValue(activity) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
320 doc.add( new Field(EMBARRASSING_FLD, data.get(EMBARRASSING_FLD), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
321 doc.add( new Field(PRIVATE_FLD, data.get(PRIVATE_FLD), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
322 String email = data.get(OWNER_EMAIL_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
323 if( email != null ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
324 doc.add( new Field(OWNER_EMAIL_FLD, email, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
325 String emailDomain = email.substring( email.indexOf('@') + 1 );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
326 doc.add( new Field(OWNER_EMAIL_DOMAIN_FLD, emailDomain, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
327 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
328 int nodeCount = ((NumericField)data.getFieldable(NODE_COUNT_FLD)).getNumericValue().intValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
329 doc.add( new NumericField(NODE_COUNT_FLD,Field.Store.YES,true).setIntValue(nodeCount) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
330 long whenCreated = ((NumericField)data.getFieldable(WHEN_CREATED_FLD)).getNumericValue().longValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
331 doc.add( new NumericField(WHEN_CREATED_FLD,Field.Store.YES,false).setLongValue(whenCreated) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
332 String tweaks = data.get(TWEAKS_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
333 if( tweaks != null ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
334 doc.add( new Field(TWEAKS_FLD, tweaks, Field.Store.YES, Field.Index.NO) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
335 doc.add( new Field(HAS_TWEAKS_FLD, "true", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
336 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
337 int fileCount = ((NumericField)data.getFieldable(FILE_COUNT_FLD)).getNumericValue().intValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
338 doc.add( new NumericField(FILE_COUNT_FLD,Field.Store.YES,false).setIntValue(fileCount) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
339 if( nodeCount > 0 ) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
340 int fileNodeRatio = fileCount*1000/nodeCount;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
341 doc.add( new NumericField(FILE_NODE_RATIO_FLD,Field.Store.NO,true).setIntValue(fileNodeRatio) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
342 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
343 int monthlyViews = ((NumericField)data.getFieldable(MONTHLY_VIEWS_FLD)).getNumericValue().intValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
344 doc.add( new NumericField(MONTHLY_VIEWS_FLD,Field.Store.YES,true).setIntValue(monthlyViews) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
345 Integer sessions = domainMap.get(domain);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
346 if( sessions == null )
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
347 sessions = 0;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
348 doc.add( new NumericField(VALUE_FLD,Field.Store.YES,true).setIntValue(sessions) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
349 indexWriter.addDocument(doc);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
350 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
351 } finally {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
352 indexWriter.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
353 reader.close();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
354 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
355 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
356
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
357 // class here
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
358
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
359 private final Document doc;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
360
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
361 public Site(Document doc) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
362 this.doc = doc;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
363 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
364
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
365 public String serverName() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
366 return doc.get(SERVER_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
367 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
368
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
369 public Server server() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
370 return Server.getServer(serverName());
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
371 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
372
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
373 public String id() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
374 return doc.get(SITE_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
375 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
376
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
377 private static final Set https = new HashSet( Arrays.asList(
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
378 "www.postgresql-archive.org",
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
379 "ffq.38.me.nabble.com"
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
380 ) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
381
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
382 public String url() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
383 String domain = doc.get(DOMAIN_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
384 String scheme = https.contains(domain) ? "https" : "http";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
385 return scheme + "://" + domain + "/";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
386 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
387
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
388 public String subject() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
389 return doc.get(SUBJECT_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
390 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
391
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
392 public String subjectHtml() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
393 return HtmlUtils.htmlEncode(subject());
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
394 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
395
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
396 public String link() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
397 return "<a href=\"" + url() + "\">" + subjectHtml() + "</a>";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
398 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
399
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
400 public String message() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
401 return doc.get(MESSAGE_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
402 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
403
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
404 public String type() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
405 return doc.get(TYPE_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
406 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
407
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
408 public int activity() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
409 NumericField fld = (NumericField)doc.getFieldable(ACTIVITY_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
410 return fld.getNumericValue().intValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
411 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
412
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
413 public int nodeCount() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
414 NumericField fld = (NumericField)doc.getFieldable(NODE_COUNT_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
415 return fld.getNumericValue().intValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
416 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
417
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
418 public Date whenCreated() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
419 NumericField fld = (NumericField)doc.getFieldable(WHEN_CREATED_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
420 return new Date(fld.getNumericValue().longValue());
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
421 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
422
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
423 public boolean isEmbarrassing() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
424 return Boolean.parseBoolean( doc.get(EMBARRASSING_FLD) );
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
425 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
426
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
427 public String tweaks() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
428 return doc.get(TWEAKS_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
429 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
430
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
431 public int fileCount() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
432 NumericField fld = (NumericField)doc.getFieldable(FILE_COUNT_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
433 return fld.getNumericValue().intValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
434 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
435
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
436 public String ownerEmail() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
437 return doc.get(OWNER_EMAIL_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
438 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
439
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
440 public int monthlyViews() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
441 NumericField fld = (NumericField)doc.getFieldable(MONTHLY_VIEWS_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
442 return fld.getNumericValue().intValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
443 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
444
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
445 public int value() {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
446 NumericField fld = (NumericField)doc.getFieldable(VALUE_FLD);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
447 return fld.getNumericValue().intValue();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
448 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
449
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
450 }