In [2020]:
import psycopg2 as pg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
%matplotlib inline

In [2021]:
# 30 May 2015 -> $ heroku pg:pull  HEROKU_POSTGRESQL_COBALT_URL lcp --app lux
!psql lcp --help | head


psql is the PostgreSQL interactive terminal.

Usage:
  psql [OPTION]... [DBNAME [USERNAME]]

General options:
  -c, --command=COMMAND    run only single command (SQL or internal) and exit
  -d, --dbname=DBNAME      database name to connect to (default: "excalibur")
  -f, --file=FILENAME      execute commands from file, then exit
  -l, --list               list available databases, then exit

In [2022]:
!psql -c "\dt" lcp


                List of relations
 Schema |        Name         | Type  |   Owner   
--------+---------------------+-------+-----------
 public | admins              | table | excalibur
 public | comments            | table | excalibur
 public | contributorlinks    | table | excalibur
 public | contributors        | table | excalibur
 public | documents           | table | excalibur
 public | elementals          | table | excalibur
 public | images              | table | excalibur
 public | inclusions          | table | excalibur
 public | items               | table | excalibur
 public | miscs               | table | excalibur
 public | periods             | table | excalibur
 public | petrofabrics        | table | excalibur
 public | petrofabrics_wares  | table | excalibur
 public | petrographics       | table | excalibur
 public | pg_search_documents | table | excalibur
 public | privacylists        | table | excalibur
 public | referencelinks      | table | excalibur
 public | references          | table | excalibur
 public | regionlinks         | table | excalibur
 public | regions             | table | excalibur
 public | schema_migrations   | table | excalibur
 public | shaperelates        | table | excalibur
 public | shapes              | table | excalibur
 public | sites               | table | excalibur
 public | tooltips            | table | excalibur
 public | users               | table | excalibur
 public | viewers             | table | excalibur
 public | voids               | table | excalibur
 public | wares               | table | excalibur
 public | workshoplinks       | table | excalibur
 public | workshops           | table | excalibur
(31 rows)


In [2023]:
!psql -c "\d+ wares" lcp


                                                           Table "public.wares"
     Column     |            Type             |                     Modifiers                      | Storage  | Stats target | Description 
----------------+-----------------------------+----------------------------------------------------+----------+--------------+-------------
 id             | integer                     | not null default nextval('wares_id_seq'::regclass) | plain    |              | 
 name           | character varying(255)      |                                                    | extended |              | 
 origin         | character varying(255)      |                                                    | extended |              | 
 chron_range    | character varying(255)      |                                                    | extended |              | 
 desc           | text                        |                                                    | extended |              | 
 created_at     | timestamp without time zone |                                                    | plain    |              | 
 updated_at     | timestamp without time zone |                                                    | plain    |              | 
 user_id        | integer                     |                                                    | plain    |              | 
 old_region_id  | integer                     |                                                    | plain    |              | 
 privacy_status | integer                     | default 1                                          | plain    |              | 
 contributor_id | integer                     |                                                    | plain    |              | 
 period         | character varying(255)      |                                                    | extended |              | 
 start_year     | integer                     |                                                    | plain    |              | 
 end_year       | integer                     |                                                    | plain    |              | 
 definition     | text                        |                                                    | extended |              | 
Indexes:
    "wares_pkey" PRIMARY KEY, btree (id)



In [2024]:
psql_lcp_connection = pg.connect("dbname=lcp user=excalibur")

In [2025]:
psql_cursor = psql_lcp_connection.cursor()

In [2026]:
psql_cursor.execute("select * from wares")

In [2027]:
psql_cursor.fetchone()


Out[2027]:
(32,
 'Red White and Blue ware',
 None,
 '1800 - 1500 BCE',
 'The ware is characterized by the decoration of alternating wavy and straight lines painted in red, white and blue.',
 datetime.datetime(2012, 9, 17, 21, 28, 23, 515035),
 datetime.datetime(2014, 5, 13, 21, 7, 45, 547331),
 5,
 24,
 1,
 None,
 'Middle Bronze Age IIA-IIB',
 -1800,
 -1500,
 None)

In [2028]:
psql_cursor.close()


In [2029]:
wares_df = pd.io.sql.read_sql("select * from wares", psql_lcp_connection)
wares_df.head(1)


Out[2029]:
id name origin chron_range desc created_at updated_at user_id old_region_id privacy_status contributor_id period start_year end_year definition
0 32 Red White and Blue ware None 1800 - 1500 BCE The ware is characterized by the decoration of... 2012-09-17 21:28:23.515035 2014-05-13 21:07:45.547331 5 24 1 None Middle Bronze Age IIA-IIB -1800 -1500 None

In [2030]:
wares_df.describe()


Out[2030]:
id user_id old_region_id privacy_status start_year end_year
count 224.000000 222.000000 85.000000 224.000000 155.000000 155.000000
mean 148.602679 27.234234 12.729412 1.263393 -1253.290323 193.580645
std 69.727324 36.824263 7.698776 0.654271 10503.905823 848.788066
min 21.000000 1.000000 1.000000 1.000000 -92500.000000 -2500.000000
25% 88.750000 5.000000 8.000000 1.000000 -800.000000 -300.500000
50% 148.500000 13.000000 13.000000 1.000000 0.000000 99.000000
75% 205.250000 28.000000 19.000000 1.000000 435.000000 714.500000
max 271.000000 165.000000 24.000000 4.000000 2450.000000 2250.000000

In [2031]:
wares_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 224 entries, 0 to 223
Data columns (total 15 columns):
id                224 non-null int64
name              224 non-null object
origin            0 non-null object
chron_range       223 non-null object
desc              223 non-null object
created_at        224 non-null datetime64[ns]
updated_at        224 non-null datetime64[ns]
user_id           222 non-null float64
old_region_id     85 non-null float64
privacy_status    224 non-null int64
contributor_id    0 non-null object
period            67 non-null object
start_year        155 non-null float64
end_year          155 non-null float64
definition        11 non-null object
dtypes: datetime64[ns](2), float64(4), int64(2), object(7)
memory usage: 28.0+ KB


In [2032]:
name_start_year_df = wares_df.loc[wares_df['start_year'].notnull(),['name','start_year']]
print name_start_year_df.count()
name_start_year_df.head(1)


name          155
start_year    155
dtype: int64
Out[2032]:
name start_year
0 Red White and Blue ware -1800

In [2033]:
name_start_year_df = name_start_year_df.sort('start_year')

In [2034]:
unique_start_years = name_start_year_df['start_year'].unique()

In [2035]:
sns.boxplot(unique_start_years, names=['start_years'], vert=False)
plt.show()



In [2036]:
sns.boxplot(unique_start_years[1:], names=['start_years'], vert=False)
plt.show()




In [2037]:
start_year_counts = wares_df['start_year'].value_counts()
start_year_counts.head()


Out[2037]:
 1100    13
-200      9
 100      8
 0        7
 800      7
dtype: int64

In [2038]:
plt.plot(wares_df['start_year'])
plt.show()



In [2039]:
plt.plot(wares_df[wares_df['start_year'] != wares_df['start_year'].min()]['start_year'])
plt.show()




In [2040]:
wares_df['period'].value_counts().head(10)


Out[2040]:
                             19
Roman, Byzantine              5
Hellenistic                   3
Iron Age                      3
Early Iron Age I              2
Early Islamic                 2
Mamluk period                 2
Late Roman - Byzantine        1
Middle Bronze Age IIA-IIB     1
Hellenistic, Roman            1
dtype: int64


In [2041]:
desc_wares_df = wares_df[(wares_df['desc'] != "") & (wares_df['desc'].notnull())]
print desc_wares_df.shape
desc_wares_df.head(1)


(150, 15)
Out[2041]:
id name origin chron_range desc created_at updated_at user_id old_region_id privacy_status contributor_id period start_year end_year definition
0 32 Red White and Blue ware None 1800 - 1500 BCE The ware is characterized by the decoration of... 2012-09-17 21:28:23.515035 2014-05-13 21:07:45.547331 5 24 1 None Middle Bronze Age IIA-IIB -1800 -1500 None

In [2042]:
desc_words = nltk.wordpunct_tokenize(str(desc_wares_df['desc'].values))
print "num of words: ", len(desc_words)
print desc_words[:10]


num of words:  14188
['[', "'", 'The', 'ware', 'is', 'characterized', 'by', 'the', 'decoration', 'of']

In [2043]:
desc_vocab = set(desc_words)
print "num of vocab: ", len(desc_vocab)


num of vocab:  1892

In [2044]:
freq_dist = nltk.FreqDist(desc_words)
freq_dist


Out[2044]:
FreqDist({',': 679, '.': 527, 'and': 435, 'the': 398, 'of': 296, '-': 290, '\\': 250, '/': 237, "'": 220, '(': 218, ...})

In [2045]:
freq_dist.tabulate(15)


   ,    .  and  the   of    -    \    /    '    (   is   to    a with   in 
 679  527  435  398  296  290  250  237  220  218  212  208  208  193  150 

In [2046]:
freq_dist['the']


Out[2046]:
398

In [2047]:
from nltk.corpus import stopwords

In [2048]:
stopwords = stopwords.words('english')
stopwords[:10]


Out[2048]:
[u'i',
 u'me',
 u'my',
 u'myself',
 u'we',
 u'our',
 u'ours',
 u'ourselves',
 u'you',
 u'your']

In [2049]:
freq_dist.plot(20)



In [2050]:
freq_dist.plot(20, cumulative=True)




In [2051]:
desc_no_stopwords = [x.lower() for x in desc_words if x.lower() not in stopwords]
print "num of words: ", len(desc_no_stopwords)
print desc_no_stopwords[:10]


num of words:  10598
['[', "'", 'ware', 'characterized', 'decoration', 'alternating', 'wavy', 'straight', 'lines', 'painted']

In [2052]:
import string
desc_nopunc_words = [x for x in desc_no_stopwords if x not in list(string.punctuation)]
print "num of words: ", len(desc_nopunc_words)
print desc_nopunc_words[:10]


num of words:  7929
['ware', 'characterized', 'decoration', 'alternating', 'wavy', 'straight', 'lines', 'painted', 'red', 'white']

In [2053]:
desc_vocab = set(desc_nopunc_words)
print "num of vocab: ", len(desc_vocab)


num of vocab:  1626

In [2054]:
freq_dist = nltk.FreqDist(desc_nopunc_words)
freq_dist


Out[2054]:
FreqDist({'red': 157, '6': 138, 'inclusions': 129, 'brown': 120, '4': 120, '5yr': 102, 'r': 102, 'fabric': 99, '2': 91, 'white': 84, ...})

In [2055]:
freq_dist.plot(20)



In [2056]:
freq_dist.plot(20, cumulative=True)




In [2057]:
desc_bigrams = nltk.bigrams(desc_nopunc_words)

In [2058]:
freq_dist = nltk.FreqDist(desc_bigrams)
freq_dist


Out[2058]:
FreqDist({('2', '5yr'): 36, ('r', 'n'): 33, ('5yr', '6'): 30, ('6', '6'): 29, ('red', 'brown'): 27, (';\\', 'r'): 26, ('5', 'yr'): 24, ('5yr', '5'): 24, ('6', '8'): 24, ('4', '6'): 24, ...})

In [2059]:
freq_dist.plot(20, cumulative=True)



In [2060]:
desc_words_no_nums = [x.lower() for x in desc_nopunc_words if not x.isdigit()]
print "num of words: ", len(desc_words_no_nums)
print desc_words_no_nums[:10]


num of words:  7209
['ware', 'characterized', 'decoration', 'alternating', 'wavy', 'straight', 'lines', 'painted', 'red', 'white']

In [2061]:
desc_vocab = set(desc_words_no_nums)
print "num of vocab: ", len(desc_vocab)


num of vocab:  1586

In [2062]:
desc_bigrams = nltk.bigrams(desc_words_no_nums)

In [2063]:
freq_dist = nltk.FreqDist(desc_bigrams)
freq_dist


Out[2063]:
FreqDist({('r', 'n'): 33, ('red', 'brown'): 27, (';\\', 'r'): 26, ('5yr', '5yr'): 24, ('xe2', 'x80'): 24, ('reddish', 'brown'): 22, ('.\\', 'r'): 19, ('iron', 'age'): 18, ('gray', 'core'): 16, ('n', 'r'): 15, ...})

In [2064]:
plt.figure(figsize=(10,5))
freq_dist.plot(40, cumulative=True)




In [2065]:
descriptions = nltk.Text(desc_words)

In [2066]:
descriptions.count('ware')


Out[2066]:
57

In [2067]:
descriptions.concordance('ware')


Displaying 25 of 69 matches:
                                     ware is characterized by the decoration o
y core ( 2 . 5YR or 5YR 4 / 1 ). The ware gets its name from the thick , slopp
many ( but not all ) vessels in this ware . This slip is matte and ranges in c
thin , reddish - orange , hard fired ware , which is produced from a highly le
 ." Please see Nabataean Semi - Fine ware . \ r \ n \ r \ nNFW is aestheticall
listine forms .' ' Early Roman Jiyeh Ware ( ERJW ) shows close affinity with t
ur ranging from gray to black . This ware contains very few lime inclusions co
thin , reddish - orange , hard fired ware , which is produced from a highly le
phase 3c is of a thicker and coarser ware and the lines that are common in Dek
ck , black paint .' ' White or light ware , with painted decoration in a singl
listine arrival in Canaan . ' ' This ware has a darker fabric than Philistin M
onze Age in Cyprus . The label Plain Ware is used to group a very large array 
sions .' ' Coarse , very low quality ware , with many inclusions and poorly si
thin , reddish - orange , hard fired ware , which is produced from a highly le
reduced to a dark grey - black . The ware is usually very thin - walled , and 
erves the epithet \ xc2 \ xb4brittle ware \ xe2 \ x80 \ x99 ( as coined by Dys
sions .' ' Vessels are made of crude ware , and decorated with painted geometr
it to be similar to that of the Acre Ware , with a light brown 7 . 5 YR 6 / 4 
or was treated similarly to the Acre Ware vessels , with a light - colored sli
thin , reddish - orange , hard fired ware , which is produced from a highly le
 of the vessel . ' ' Coarse handmade ware , generally of desert origin and aff
periods , the term " Handmade Arabah Ware " is more appropriate ( cf . Martin 
se holemouth jars , made from coarse ware , similar to cooking pots of the lat
 by fire .' ' Characteristic of this ware is exterior paint on Aila ( Aqaba ) 
 is exterior paint on Aila ( Aqaba ) ware vessels . The paint can vary in colo

In [2068]:
descriptions.collocations()


Iron Age; Stephan Schmid; carefully manufactured; less carefully; gray
core; manufactured examples; highly levigated; occasionally display;
shell thin; sodium feldspars; reddish yellow; irregular fracture;
painted design; small white; NPFW occasionally; levigated clay;
display inclusions; reddish brown; hard fired; iron oxides

In [2069]:
descriptions.dispersion_plot(['ware', 'red', 'brown', 'white'])



Use pandas


In [2070]:
# character counts
[len(desc) for desc in desc_wares_df['desc']][:10]


Out[2070]:
[114, 555, 564, 311, 335, 126, 255, 529, 112, 158]

In [2071]:
# word counts
[len(desc.split(' ')) for desc in desc_wares_df['desc']][:10]


Out[2071]:
[19, 99, 102, 54, 58, 19, 38, 77, 18, 21]

In [2072]:
plt.figure(figsize=(10,5))
plt.hist([len(desc.split(' ')) for desc in desc_wares_df['desc']], bins=30)
plt.xlabel('description lengths')
plt.ylabel('frequencies')
plt.show()




In [2073]:
desc_wares_df.loc[:,'desc'] = desc_wares_df.loc[:,'desc'].str.lower()

In [2083]:
desc_wares_df['desc'].head()


Out[2083]:
0    the ware is characterized by the decoration of...
4    the fabric is coarse and gritty. the color var...
6    moderately hard, coarse fabric with many small...
7    bassit imperial amphorae fabric (hayes 1991, f...
8    a27.1 bassit imperial amphora fabric black san...
Name: desc, dtype: object

In [2138]:
desc_nopunc_df = desc_wares_df.replace(to_replace={"desc":{"\W".format(stop_words):" "}}, regex=True)
desc_nopunc_df['desc'].head()


Out[2138]:
0    the ware is characterized by the decoration of...
4    the fabric is coarse and gritty  the color var...
6    moderately hard  coarse fabric with many small...
7    bassit imperial amphorae fabric  hayes 1991  f...
8    a27 1 bassit imperial amphora fabric black san...
Name: desc, dtype: object

In [2139]:
stop_words = "|".join([sw.encode('ascii') for sw in stopwords])
stop_words


Out[2139]:
'i|me|my|myself|we|our|ours|ourselves|you|your|yours|yourself|yourselves|he|him|his|himself|she|her|hers|herself|it|its|itself|they|them|their|theirs|themselves|what|which|who|whom|this|that|these|those|am|is|are|was|were|be|been|being|have|has|had|having|do|does|did|doing|a|an|the|and|but|if|or|because|as|until|while|of|at|by|for|with|about|against|between|into|through|during|before|after|above|below|to|from|up|down|in|out|on|off|over|under|again|further|then|once|here|there|when|where|why|how|all|any|both|each|few|more|most|other|some|such|no|nor|not|only|own|same|so|than|too|very|s|t|can|will|just|don|should|now'

In [2142]:
desc_less_df = desc_nopunc_df.replace(to_replace={"desc":{"(^|\s+)({0})\s+".format(stop_words):" "}}, regex=True)
desc_less_df['desc'].head()


Out[2142]:
0     ware characterized the decoration alternating...
4     fabric coarse gritty color varies red  2 5 yr...
6    moderately hard  coarse fabric many small larg...
7    bassit imperial amphorae fabric  hayes 1991  f...
8    a27 1 bassit imperial amphora fabric black san...
Name: desc, dtype: object

In [2143]:
# run twice cause it's late
desc_less_df = desc_less_df.replace(to_replace={"desc":{"(^|\s+)({0})\s+".format(stop_words):" "}}, regex=True)
desc_less_df['desc'].head()


Out[2143]:
0     ware characterized decoration alternating wav...
4     fabric coarse gritty color varies red  2 5 yr...
6    moderately hard  coarse fabric many small larg...
7    bassit imperial amphorae fabric  hayes 1991  f...
8    a27 1 bassit imperial amphora fabric black san...
Name: desc, dtype: object

In [2150]:
# run thrice cause it's late
desc_less_df = desc_less_df.replace(to_replace={"desc":{"(^|\s+)({0})\s+".format(stop_words):" "}}, regex=True)
desc_less_df['desc'].head()


Out[2150]:
0     ware characterized decoration alternating wav...
4     fabric coarse gritty color varies red  2 5 yr...
6    moderately hard  coarse fabric many small larg...
7    bassit imperial amphorae fabric  hayes 1991  f...
8    a27 1 bassit imperial amphora fabric black san...
Name: desc, dtype: object

In [2151]:
desc_less_df[desc_less_df['desc'].str.contains(' the ')]['desc']


Out[2151]:
Series([], Name: desc, dtype: object)

In [2157]:
desc_less_df['desc']


Out[2157]:
0       ware characterized decoration alternating wav...
4       fabric coarse gritty color varies red2 5 yr 5...
6      moderately hardcoarse fabric many small large ...
7      bassit imperial amphorae fabrichayes 1991fig 6...
8      a27 1 bassit imperial amphora fabric black san...
9      vessels grey white color granular texturesmall...
10     denseclean light pinkish brown5yr 7 4 7 5yr 8 ...
12     nfw egg shell thinreddish orangehard fired war...
13     heavy red slip vertical burnishing horizontal ...
14     often red slipped black painted decorationsimp...
15     early roman jiyeh wareerjwshows close affinity...
16     npfw egg shell thinreddish orangehard fired wa...
17     white light ware painted decoration single col...
18      ware darker fabric philistin monochrome devel...
19     whitish light brown fabricoccassionally pinkis...
20     plain wares represent major class undecorated ...
22     coarse low quality ware many inclusions poorly...
23     a37 later bassit amphorae fabricc fm32 1o33 3 ...
24                      hardgrittydark grey black fabric
25     m32 bassit mortaria fabrichayes 1967 red brown...
27     m32 1 hard dark brown fabric irregular fractur...
29     n32 oxidisedred dark red fabric hard irregular...
30     n34 oxidised red fabric hard irregular fractur...
31     whitish pinkish fine well levidated clayself s...
32     hellenistic cypriot group 1matrix color ranges...
33      fabric calcareous sandy sometimes mildly poro...
36     reddish brown brown exteriorgray black core re...
37                                          painted pots
38      cleanmoderately harddense light pinkish brown...
39     vessel designed transported seaelongated conic...
                             ...                        
190    moderately hardextremely coarse fabric many me...
191    hardbrick redcoarse fabric10r 5 4moderate poro...
192    hardfinebuff fabric2 5y 7 4 low porosity5 smal...
193    friable greyish white stonepaste inclusionstra...
195    fine reddish yellow7 5yr6 65yr5 6 yellowish br...
196    vessels generally fully fired pale pink brown ...
197    hardcoarsedark brown fabricsurface7 5yr 4 3n b...
198     typical iron age mortarium straight everted w...
199    dominant style cypro geometric periodattested ...
200    fabric varies colourhardness porosity compared...
201    reworked marl taqiye formationreddish yellowra...
202    hardcoarsegrey fabric10yr 5 1grey middlelight ...
203    hardfineorange red fabric2 5yr 6 65yr 6 6low p...
204    npfw egg shell thinreddish orangehard fired wa...
205     matrix reddishclayeyoptically active silty si...
206     ware classified unique physical attribute pla...
207     tell el yahudiya ware characterized distincti...
208    npfw egg shell thinreddish orangehard fired wa...
209    dominant ware middle late cypro geometric peri...
210     fabric ware made nile silt naturally abundant...
211     npfw egg shell thinreddish orangehard fired w...
212    npfw egg shell thinreddish orangehard fired wa...
213    dominant ware early cypro archaic period attes...
214     thick walledcoarse sandy fabric varies pink r...
215     thick walledcoarse sandy fabric varies buff b...
216     fine thin walled ware colors varying buff lig...
217     thin walledbrick red ware used make cooking v...
220    fpf fabric defined conspicuous coarse rounded ...
221     fabric quite hard fairly gritty many smallmed...
223    open carinated bowls made using brownsoil deri...
Name: desc, dtype: object


want to compare each desc to each other; need to keep them separated, unlike below (maybe just in pandas, go lowercase, replace punc, and replace stopwords with nadda); also, as seen below, remove those double puncs that get through using regex (e.g., ".'")


In [2001]:
desc_words_no_nums


Out[2001]:
['ware',
 'characterized',
 'decoration',
 'alternating',
 'wavy',
 'straight',
 'lines',
 'painted',
 'red',
 'white',
 'blue',
 ".'",
 'fabric',
 'coarse',
 'gritty',
 'color',
 'varies',
 'red',
 'yr',
 'yellowish',
 'red',
 'yr',
 'brown',
 'yr',
 'dark',
 'brown',
 'yr',
 'black',
 'limestone',
 'grits',
 'inclusions',
 'sand',
 'white',
 'spots',
 'due',
 'vitrification',
 'vessels',
 'light',
 'colored',
 'exterior',
 'ranges',
 'white',
 'y',
 'light',
 'red',
 'yr',
 '),',
 'pale',
 'brown',
 'yr',
 'light',
 'reddish',
 'brown',
 'yr',
 ').',
 'lighter',
 'colored',
 'exterior',
 'apparently',
 'achieved',
 'dipping',
 'vessels',
 'salt',
 'water',
 'firing',
 'kiln',
 ".'",
 'moderately',
 'hard',
 'coarse',
 'fabric',
 'many',
 'small',
 'large',
 'white',
 'gray',
 'brown',
 'red',
 'inclusions',
 'color',
 'fabric',
 'slip',
 'highly',
 'variable',
 'unslipped',
 'surfaces',
 'fired',
 'purplish',
 'brown',
 '5yr',
 '5yr',
 'pink',
 '10r',
 '),',
 'orange',
 '5yr',
 '),',
 'pale',
 'yellow',
 '10yr',
 ').',
 'often',
 'wide',
 'gray',
 'core',
 '5yr',
 '5yr',
 ').',
 'ware',
 'gets',
 'name',
 'thick',
 'sloppily',
 'applied',
 'slip',
 'partially',
 'coats',
 'many',
 'vessels',
 'ware',
 'slip',
 'matte',
 'ranges',
 'color',
 'red',
 'brown',
 '10r',
 '5yr',
 '5yr',
 'pink',
 '5yr',
 ").'",
 'bassit',
 'imperial',
 'amphorae',
 'fabric',
 'hayes',
 'fig',
 'cf',
 'reynolds',
 'figs',
 'cat',
 '.)',
 'red',
 'oxidized',
 'fabric',
 'hard',
 'fine',
 'fracture',
 'sandy',
 'feel',
 'common',
 'poorly',
 'sorted',
 'angular',
 'black',
 'sand',
 '1mm',
 'common',
 'lime',
 '2mm',
 'moderate',
 'white',
 'quartz',
 'occasional',
 'red',
 'stone',
 '1mm',
 ".'",
 'a27',
 'bassit',
 'imperial',
 'amphora',
 'fabric',
 'black',
 'sand',
 'variant',
 'c',
 'f',
 'o33',
 'hard',
 'brown',
 'fabric',
 'irregular',
 'fracture',
 'sandy',
 'gritty',
 'feel',
 'inclusions',
 'common',
 'sub',
 'rounded',
 'black',
 'sand',
 'mm',
 'moderate',
 'subanglular',
 'lime',
 '2mm',
 'occasional',
 'quartz',
 '5mm',
 'sandy',
 'matrix',
 'moderated',
 'fine',
 'silver',
 'mica',
 ".'",
 'vessels',
 'grey',
 'white',
 'color',
 'granular',
 'texture',
 'small',
 'occasional',
 'medium',
 'angular',
 'lime',
 'inclusions',
 'quartz',
 'sand',
 ".'",
 'dense',
 'clean',
 'light',
 'pinkish',
 'brown',
 '5yr',
 '5yr',
 '),',
 'slightly',
 'granular',
 'fine',
 'lime',
 'inclusions',
 'fully',
 'fired',
 'matte',
 'semi',
 'lustrous',
 'smooth',
 'orange',
 'red',
 'slip',
 'brushed',
 'usually',
 'covering',
 'entire',
 'interior',
 'upper',
 'exterior',
 'dribbling',
 'onto',
 'lower',
 'wall',
 'nfw',
 'egg',
 'shell',
 'thin',
 'reddish',
 'orange',
 'hard',
 'fired',
 'ware',
 'produced',
 'highly',
 'levigated',
 'clay',
 'later',
 'less',
 'carefully',
 'manufactured',
 'examples',
 'nfw',
 'occasionally',
 'display',
 'inclusions',
 'quartz',
 'limestone',
 'could',
 'indicative',
 'mass',
 'production',
 '."',
 'please',
 'see',
 'nabataean',
 'semi',
 'fine',
 'ware',
 'r',
 'n',
 'r',
 'nnfw',
 'aesthetically',
 'pleasing',
 'produced',
 'mimic',
 'eastern',
 'western',
 'sigillatas',
 'metal',
 'later',
 'glass',
 'table',
 'wares',
 'nfw',
 'represented',
 'forms',
 'e',
 'g',
 'cups',
 'jugs',
 'juglets',
 'jars',
 'unguentaria',
 'bowls',
 'plates',
 ".'",
 'heavy',
 'red',
 'slip',
 'vertical',
 'burnishing',
 'horizontal',
 'black',
 'white',
 'lines',
 'decorating',
 'body',
 'vessel',
 ".'",
 'often',
 'red',
 'slipped',
 'black',
 'painted',
 'decoration',
 'simple',
 'philistine',
 'motifs',
 'e',
 'g',
 'spirals',
 'sometimes',
 'undecorated',
 'keeping',
 'classical',
 'philistine',
 'forms',
 ".'",
 'early',
 'roman',
 'jiyeh',
 'ware',
 'erjw',
 'shows',
 'close',
 'affinity',
 'late',
 'hellenistic',
 'material',
 'yet',
 'diversity',
 'two',
 'groups',
 'colour',
 'admixture',
 'made',
 'identification',
 'group',
 'easy',
 'erjw',
 'characterised',
 'surface',
 'reddish',
 'brown',
 'colour',
 'yr',
 '10r',
 '),',
 'thin',
 'fracture',
 'core',
 'colour',
 'ranging',
 'gray',
 'black',
 'ware',
 'contains',
 'lime',
 'inclusions',
 'compared',
 'lhjw',
 'addition',
 'also',
 'contains',
 'gray',
 'black',
 'particles',
 'however',
 'ceramic',
 'body',
 'contains',
 'less',
 'sand',
 'material',
 'previous',
 'period',
 'also',
 'appears',
 'compact',
 'worth',
 'mention',
 'material',
 'quite',
 'resemblance',
 'classic',
 'beirut',
 'clay',
 'also',
 'mentioned',
 'early',
 'roman',
 'production',
 'identical',
 'utility',
 'vessel',
 'groups',
 'ones',
 'earlier',
 'production',
 'distinguished',
 'however',
 'diminished',
 'diversity',
 'specific',
 'types',
 'forms',
 'utility',
 'vessel',
 'groups',
 'noticeable',
 'npfw',
 'egg',
 'shell',
 'thin',
 'reddish',
 'orange',
 'hard',
 'fired',
 'ware',
 'produced',
 'highly',
 'levigated',
 'clay',
 'later',
 'less',
 'carefully',
 'manufactured',
 'examples',
 'npfw',
 'occasionally',
 'display',
 'inclusions',
 'quartz',
 'limestone',
 'vessel',
 'finished',
 'painted',
 'design',
 'designs',
 'categorized',
 'dekorphase',
 'stephan',
 'schmid',
 'dekorphase',
 '3c',
 'thicker',
 'coarser',
 'ware',
 'lines',
 'common',
 'dekorphases',
 '3a',
 '3b',
 'disappeared',
 'instead',
 'palm',
 'leaf',
 'pomegranate',
 'patterns',
 'applied',
 'thick',
 'black',
 'paint',
 ".'",
 'white',
 'light',
 'ware',
 'painted',
 'decoration',
 'single',
 'color',
 'particular',
 'motifs',
 'linked',
 'philistine',
 'arrival',
 'canaan',
 'ware',
 'darker',
 'fabric',
 'philistin',
 'monochrome',
 'develops',
 'vessels',
 'thick',
 'white',
 'slip',
 'red',
 'black',
 'decoration',
 'motifs',
 'continue',
 'seen',
 'philistine',
 'monochrome',
 'variation',
 'spirals',
 'checkerboards',
 'birds',
 'fish',
 'crosses',
 'common',
 ".'",
 'whitish',
 'light',
 'brown',
 'fabric',
 'occassionally',
 'pinkish',
 'well',
 'levigated',
 'brown',
 'decoration',
 'plain',
 'wares',
 'represent',
 'major',
 'class',
 'undecorated',
 'pottery',
 'late',
 'bronze',
 'age',
 'cyprus',
 'label',
 'plain',
 'ware',
 'used',
 'group',
 'large',
 'array',
 'vessel',
 'types',
 'fabrics',
 'mostly',
 'made',
 'coarse',
 'grained',
 'fabrics',
 'full',
 'inclusions',
 ".'",
 'coarse',
 'low',
 'quality',
 'ware',
 'many',
 'inclusions',
 'poorly',
 'sifted',
 'painted',
 'various',
 'scenes',
 'paint',
 'brown',
 'red',
 'earlier',
 'stages',
 'deriving',
 'earlier',
 'imported',
 'bichrome',
 'late',
 'bronze',
 'age',
 'usually',
 'red',
 'lb',
 'iib',
 '13th',
 'century',
 'bce',
 ').',
 'tradition',
 'continues',
 'iron',
 'age',
 'alongside',
 'philistine',
 'pottery',
 'although',
 'smaller',
 'quantities',
 'painted',
 'decorations',
 'include',
 'division',
 'triglyphs',
 'metopes',
 'ibexes',
 'flanking',
 'palm',
 'tree',
 'animals',
 'plants',
 'simpler',
 'lines',
 'lipstick',
 'red',
 'line',
 'paint',
 'along',
 'rim',
 ".'",
 'a37',
 'later',
 'bassit',
 'amphorae',
 'fabric',
 'c',
 'f',
 'm32',
 'o33',
 'oxidised',
 'red',
 'amphora',
 'fabric',
 'hard',
 'irregular',
 'fracture',
 'ad',
 'coarse',
 'feel',
 'inclusions',
 'common',
 'sub',
 'angular',
 'basalt',
 'mm',
 'common',
 'sub',
 'angular',
 'lime',
 '2mm',
 'occasional',
 'red',
 'stone',
 '.\\',
 'r',
 'n',
 'r',
 'nthe',
 'date',
 'distribution',
 'implies',
 'production',
 'commencing',
 'early',
 '3rd',
 'century',
 'presence',
 'sixth',
 'century',
 'forms',
 'include',
 'early',
 'fifth',
 'century',
 'lra1',
 'copies',
 'also',
 'note',
 'group',
 'late',
 '2nd',
 '?-',
 '3rd',
 'c',
 'ce',
 'xe2',
 'x80',
 'x98transport',
 'dolia',
 'xe2',
 'x80',
 'x99',
 'fabric',
 'hard',
 'gritty',
 'dark',
 'grey',
 'black',
 'fabric',
 'm32',
 'bassit',
 'mortaria',
 'fabric',
 'hayes',
 ').',
 'red',
 'brown',
 'fabric',
 'hard',
 'irregular',
 'fracture',
 'harsh',
 'sandy',
 'feel',
 'inclusions',
 'common',
 'sub',
 'angular',
 'quartz',
 '4mm',
 'common',
 'black',
 'sand',
 '5mm',
 'rounded',
 'lime',
 '1mm',
 ".'",
 'm32',
 'hard',
 'dark',
 'brown',
 'fabric',
 'irregular',
 'fracture',
 'harsh',
 'sandy',
 'feel',
 'inclusions',
 'abundant',
 'basalt',
 '6mm',
 'common',
 'sub',
 'rounded',
 'lime',
 'c',
 '5mm',
 'sandy',
 'matrix',
 ".'",
 'n32',
 'oxidised',
 'red',
 'dark',
 'red',
 'fabric',
 'hard',
 'irregular',
 'fracture',
 'sandy',
 'feel',
 'inclusions',
 'common',
 'quartz',
 '2mm',
 'black',
 'sand',
 '2mm',
 'occasional',
 'lime',
 'red',
 'stone',
 'n34',
 'oxidised',
 'red',
 'fabric',
 'hard',
 'irregular',
 'fracture',
 'sandy',
 'feel',
 'inclusions',
 'common',
 'subrounded',
 'lime',
 '8mm',
 'moderate',
 'black',
 'sand',
 '3mm',
 '.\\',
 'r',
 'n',
 'r',
 'nthis',
 'fabric',
 'would',
 'appear',
 'used',
 'later',
 'dolia',
 ".'",
 'whitish',
 'pinkish',
 'fine',
 'well',
 'levidated',
 'clay',
 'self',
 'slip',
 'dark',
 'brown',
 'reddish',
 'monochrome',
 'decoration',
 '.\\',
 'r',
 'n',
 'hellenistic',
 'cypriot',
 'group',
 'matrix',
 'color',
 'ranges',
 'orange',
 'red',
 'according',
 'firing',
 'temperature',
 'level',
 'grey',
 'black',
 'red',
 'grits',
 'white',
 'yellowish',
 'lime',
 'grits',
 'may',
 'occur',
 'section',
 'matrix',
 'slip',
 'well',
 'sticked',
 'clay',
 'orange',
 'reddish',
 'coloured',
 'often',
 'lime',
 'spots',
 'visible',
 'surface',
 ".'",
 'fabric',
 'calcareous',
 'sandy',
 'sometimes',
 'mildly',
 'porous',
 'main',
 'tempers',
 'characteristic',
 'well',
 'sorted',
 'costal',
 'quartz',
 'sand',
 '(~',
 '%),',
 'poorly',
 'sorted',
 'lime',
 'stone',
 'carmel',
 'kurkar',
 'fragments',
 'sea',
 'shells',
 'r',
 'nthe',
 'clay',
 'crumbly',
 'nature',
 'firing',
 'process',
 'often',
 'leaves',
 'dark',
 'core',
 'could',
 'suggest',
 'medium',
 'level',
 'firing',
 'jars',
 'display',
 'regular',
 'surface',
 'treatment',
 'decoration',
 'r',
 'n',
 'r',
 'nflasks',
 'fabric',
 'calcareous',
 'sandy',
 'sometimes',
 'mildly',
 'porous',
 'main',
 'tempers',
 'well',
 'sorted',
 'coastal',
 'quartz',
 'sand',
 '(~',
 '%),',
 'poorly',
 'sorted',
 'limestone',
 'carmel',
 'kurkar',
 'fragments',
 'sea',
 'shells',
 'r',
 'n',
 'r',
 'nthe',
 'clay',
 'crumbly',
 'nature',
 'firing',
 'process',
 'often',
 'leaves',
 'thin',
 'strip',
 'oxidized',
 'reddish',
 'fabric',
 'leaving',
 'dark',
 'suggests',
 'medium',
 'level',
 'firing',
 'air',
 'reached',
 'inner',
 'part',
 'flask',
 'firing',
 'process',
 '.\\',
 'r',
 'n',
 'r',
 'nflasks',
 'polished',
 'slipped',
 'concentric',
 'decoration',
 'red',
 'red',
 'black',
 'typological',
 'chronological',
 'discussion',
 'flasks',
 'see',
 'gilboa',
 'et',
 'al',
 'r',
 'n',
 ...]